Loading packages
#===============================================================================
#BTC.LineZero.Header.1.1.0
#===============================================================================
#R Markdown environment setup and reporting utility.
#===============================================================================
#RLB.Dependencies:
# knitr, magrittr, pacman, rio, rmarkdown, rmdformats, tibble, yaml
#===============================================================================
#Input for document parameters, libraries, file paths, and options.
#=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=
knitr::opts_chunk$set(message=FALSE, warning = FALSE)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c(
"readxl", "phyloseq", "tidyverse", "pacman", "yaml"
)
path_working <- "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git"
path_library <- "/Library/Frameworks/R.framework/Resources/library"
str_libraries <- c("readxl", "phyloseq", "tidyverse", "pacman", "yaml", "ggplot2", "vegan", "microbiome", "ggpubr", "viridis", "decontam", "gridExtra", "ggpubr", "lme4", "lmerTest", "writexl", "harrietr", "Maaslin2", "ggtext", "ggpmisc", "gridExtra", "gamm4", "reshape2", "kableExtra", "knitr", "ggtree", "car", "mediation")
YAML_header <-
'---
title: "Host-DNA depletion analysis"
author: "Minsik Kim"
date: "2032.08.31"
output:
rmdformats::downcute:
downcute_theme: "chaos"
code_folding: hide
fig_width: 6
fig_height: 6
---'
seed <- "20230427"
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Loads libraries, file paths, and other document options.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Boot <- function() {
.libPaths(path_library)
require(pacman)
pacman::p_load(c("knitr", "rmarkdown", "rmdformats", "yaml"))
knitr::opts_knit$set(root.dir = path_working)
str_libraries |> unique() |> sort() -> str_libraries
pacman::p_load(char = str_libraries)
set.seed(seed)
}
FUN.LineZero.Boot()
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#Outputs R environment report.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
FUN.LineZero.Report <- function() {
cat("Line Zero Environment:\n\n")
paste("R:", pacman::p_version(), "\n") |> cat()
cat("Libraries:\n")
for (str_libraries in str_libraries) {
paste(
" ", str_libraries, ": ", pacman::p_version(package = str_libraries),
"\n", sep = ""
) |> cat()
}
paste("\nOperating System:", pacman::p_detectOS(), "\n") |> cat()
paste(" Library Path:", path_library, "\n") |> cat()
paste(" Working Path:", path_working, "\n") |> cat()
paste("Seed:", seed, "\n\n") |> cat()
cat("YAML Header:\n")
cat(YAML_header)
}
FUN.LineZero.Report()
## Line Zero Environment:
##
## R: 4.3.1
## Libraries:
## readxl: 1.4.3
## phyloseq: 1.44.0
## tidyverse: 2.0.0
## pacman: 0.5.1
## yaml: 2.3.7
## ggplot2: 3.4.2
## vegan: 2.6.4
## microbiome: 1.22.0
## ggpubr: 0.6.0
## viridis: 0.6.4
## decontam: 1.20.0
## gridExtra: 2.3
## ggpubr: 0.6.0
## lme4: 1.1.34
## lmerTest: 3.1.3
## writexl: 1.4.2
## harrietr: 0.2.3
## Maaslin2: 1.14.1
## ggtext: 0.1.2
## ggpmisc: 0.5.3
## gridExtra: 2.3
## gamm4: 0.2.6
## reshape2: 1.4.4
## kableExtra: 1.3.4
## knitr: 1.43
## ggtree: 3.8.2
## car: 3.1.2
## mediation: 4.5.0
##
## Operating System: Darwin
## Library Path: /Library/Frameworks/R.framework/Resources/library
## Working Path: /Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git
## Seed: 20230427
##
## YAML Header:
## ---
## title: "Host-DNA depletion analysis"
## author: "Minsik Kim"
## date: "2032.08.31"
## output:
## rmdformats::downcute:
## downcute_theme: "chaos"
## code_folding: hide
## fig_width: 6
## fig_height: 6
## ---
1. Loading data
1.1. phyloseq obejct
1.2. qPCR data (controls)
LIST OF PRIMARY QUESTIONS AND CORRESPONDING ANALYSES
STUDY AIMS
Aim 1. What is the efficiency of host depletion for each method?
• % host DNA measured by mNGS
o Sequencing failure rates
• % host DNA measured by qPCR
Aim 2. Did host depletion change microbial community composition?
• Alpha diversity (microbial species richness, microbial predicted function richness)
• Beta diversity (Morisita-Horn distance compared to control (not host-depleted) sample)
• Differential abundance
Aim 3: Is there effect modification by sample type?
Aim 4. Does host depletion increase the risk of contamination?
Aim 1: What is the efficiency of host depletion for each method?
1a. Did treatment change % host DNA?
Figure
A: Raw reads (do you really need to log10 transform y-axis?)
B: Host mapped reads
C: Final reads (QC’d, non-human)
D: % Host DNA
Statistical model
### linear mixed effects model to account for repeated measures (multiple aliquots per individual)
### Outcome = % host DNA (mNGS reads mapping to human genome/QC’d reads)
### Predictors
Host depletion method (categorical, comparison group = control not host-depleted)
Sample type
### testing for interaction term to justify stratified analysis
### % host DNA ~ method + sample_type + method*sample_type + (1|subjid), report interaction p-value.
### Stratified analysis
### %host ~ method + (1|subjid), report beta [95% CI], p-value for each sample type
1b. Did host depletion work successfully for sequencing?
Statistical Model
### Logistics mixed effects model
### Outcome = library prep and sequencing failure rate
### (==1 if failed library prep, ==0 if not)
### Report in text sequencing failure (n) stratified by sample type and treatment method.
## testing for interaction term to justify stratified analysis
### fail ~ method + sample_type + method*sample_type + (1|subjid), report interaction p-value.
### stratified analysis:
### fail ~ method + (1|subjid), report OR [95% CI], p-value for each sample type
Final reads
### Investigate distribution of final reads and apply proper transformation.
Figure
A: Raw reads
B: Host mapped reads
C: Final reads
D: Host DNA ratio
model (to justify stratified analysis)
### final reads ~ method + sample_type + method*sample_type + (1|subjid), just report interaction p-value.
model (stratified analysis)
# final reads ~ method + (1|subjid), report effect size [95% CI], p-value for each sample type.
Aim 2. Did host depletion change microbial community composition?
Figure
# Alpha diversity. Species richness by treatment, facetted by sample type
# Beta diversity. Morisita-Horn distance of each treated compared to untreated sample
# forest plot - x-axis as distance (size of bias) and y axis as treatment. Facet data by sample type
Alpha diversity
# Linear mixed effects model
## Outcome
### species richness
## inverse simpson
# stratified analyses by sample type
## report the significant method*sample_type interaction term to justify stratified analysis)
## species_richness ~ method + sample_type + method * sample_type + (1|subjid)
## InvSimp~ sample type + treatment + sample type * treatment + (1|subject_id)
Beta diversity
### Outcome = Morisita Horn
# PERMANOVA
## Overall: MH ~ sample type + treatment + sample type * treatment, strata = subject_id )
## Stratified: MH ~ method + strata = subject_id
# Calculate Morisita-Horn distance between control and host depleted sample and use that as an outcome in linear model
## Change in MH ~ method + sample_type + method*sample_type + log10(reads))
Differential abundance of species
# Outcome = relative abundance of species
Figure
# Volcano plot with Mock, BAL, Nasal and Sputum
## Note: Mock community placed in Zymo DNA/RNA Protect prior to freezing. Zymo DNA/RNA protect is a mild detergent thus increases susceptibility of microbial cells to lysis and is not recommended to put these samples through host depletion
Statistical model
# MaAsLin2
## Stratified by sample type
### Species ~ + lyPMA + Benzonase + Host zero + Molysis + QIAamp + (1|subjid). Comparison group is untreated sample
## Figure: Balloon plots for BAL, Nasal and Sputum (Figure). Add q-val, mean relative abundance
Proportion gram negative
# Species collapsed into single category: gram negative bacteria, gram positive bacteria, fungi
# Statistical model
## % gram negative ~ sample type + treatment + sample type * treatment + (1|subject_id) )
## Stratified analysis
### % gram negative ~ treatment + (1|subject_id) )
Does change differ by sample type for results of predicted function as well? Repeat all analyses above analyses but use predicted microbial function (KEGG, CPM) instead
Aim 3: Is there effect modification by sample type?
All above analyses with interaction term and stratified analyses
If effect modification present, which treatment is the best for each treatment?
Make a summary result by treatment, stratified by sample type
### Issues from sequencing (library failure, no change in host depletion, etc.)
### Changes in alpha diversity (Mean species richness change by each subject)
### Changes in beta diversity (statistical test results with significant factor)
Secondary analyses
1. Is qPCR an alternative to mNGS for estimating % Host DNA?
Figure
### A: Correlation Plot. x-axis with host DNA proportion measured with shotgun metagenomic sequencing vs y-axis as that by qPCR.
### B: Bland-Altman – x-axis as size of observation vs y-axis difference at that scale.
Statistics
### Correlation coefficient
### Bland-Altman statistics
2. Mediation analysis. Host depletion treatment increases species and predicted microbial function richness due higher effective sequencing depth
# Mediation R package
## outcome = species richness
## exposure = each host depletion treatment compared to untreated control
## mediator = final reads
## mediator-outcome confounders = sample type
## exposure-mediator confounders = NA
## outcome model = Mixed effects linear regression
## mediator model = Mixed effects linear regression
Aim 4. Does host depletion increase the risk of contamination?
Were there any contaminants in the sequencing result? If species richness were increased after treatment, is it due to increased coverage with higher final reads?
# Run decontam (Davis 2018. PMID: 30558668) ## List potential contaminants with their prevalences in samples and negative controls ## Sensitivity analysis where potential contaminant species identified removed ### species richness ~ treatment + (1|subjid)). # Decontaminate data by estimating microbial population using mock community data. ## Run Tinyvamp (arXiv: 2204.12733) ## Make adjusted relative abundance table by calculating taxon-specific detection efficiencies, relative to a reference taxa (Enterococcus). ## Known community for efficiency estimation: negative (no taxa) + positive (mock community
Data inputs
Meta data
qPCR - bacteria
qPCR - human
qPCR host %
Raw reads
final reads
sequencing host %
library prep failure status
Raw reads
subject_id
treatment
sample_type
subject_id
Sequencing result
samples
controls
Aanalysis preparation
Analysis prep
Loading data
# Loading files -----------------------------------------------------------
#loading tidy phyloseq object
phyloseq_unfiltered <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/Phyloseq/PHY_20230521_MGK_host_tidy.rds")
#sample data loading
sample_data <- sample_data(phyloseq_unfiltered$phyloseq_count)
Tinyvamp results
# Loading files -----------------------------------------------------------
#loading tidy phyloseq object
tinyvamp_untreated <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/HOST_tinyvamp_decontaminated_Amy_Willis/20230807_amy/untreated_p_hats_all_v3.RDS") %>%
t()
tinyvamp_lypma <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/HOST_tinyvamp_decontaminated_Amy_Willis/20230807_amy/lypma_p_hats_all_v3.RDS") %>%
t()
tinyvamp_benzonase <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/HOST_tinyvamp_decontaminated_Amy_Willis/20230807_amy/benzonase_p_hats_all_v3.RDS") %>%
t()
tinyvamp_host_zero <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/HOST_tinyvamp_decontaminated_Amy_Willis/20230807_amy/hostzero_p_hats_all_v3.RDS") %>%
t()
tinyvamp_molysis <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/HOST_tinyvamp_decontaminated_Amy_Willis/20230807_amy/molysis_p_hats_all_v3.RDS") %>%
t()
tinyvamp_qiaamp <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/HOST_tinyvamp_decontaminated_Amy_Willis/20230807_amy/qiaamp_p_hats_all_v3.RDS") %>%
t()
#tinyvamp_benzonase <- read_rds("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/4_Data/2_Tidy/HOST_tinyvamp_decontaminated_Amy_Willis/benzonase_p_hats.RDS")
sample_data_tv <- phyloseq_unfiltered$phyloseq_rel %>%
subset_samples(sample_type %in% c("BAL", "Nasal", "Sputum")) %>%
sample_data %>% data.frame() %>% mutate("names"= paste(.$original_sample, .$treatment, sep = "_")) %>%
remove_rownames() %>% column_to_rownames("names") %>% sample_data()
phyloseq_tv <- merge_phyloseq(
merge_phyloseq(sample_data_tv,
otu_table(tinyvamp_untreated,
taxa_are_rows = T)),
merge_phyloseq(sample_data_tv,
otu_table(tinyvamp_lypma,
taxa_are_rows = T)),
merge_phyloseq(sample_data_tv,
otu_table(tinyvamp_benzonase,
taxa_are_rows = T)),
merge_phyloseq(sample_data_tv,
otu_table(tinyvamp_host_zero,
taxa_are_rows = T)),
merge_phyloseq(sample_data_tv,
otu_table(tinyvamp_molysis,
taxa_are_rows = T)),
merge_phyloseq(sample_data_tv,
otu_table(tinyvamp_qiaamp,
taxa_are_rows = T)),
tax_table(phyloseq_unfiltered$phyloseq_rel)
)
Alpha diversity indices
alpha_diversity <- function(data) {
otu_table <- otu_table(data) %>% .[colSums(.) !=0]
S.obs <- rowSums(t(otu_table) != 0)
sample_data <- sample_data(data)
data_evenness <- vegan::diversity(t(otu_table)) / log(vegan::specnumber(t(otu_table))) # calculate evenness index using vegan package
data_shannon <- vegan::diversity(t(otu_table), index = "shannon") # calculate Shannon index using vegan package
data_hill <- exp(data_shannon) # calculate Hills index
data_dominance <- microbiome::dominance(otu_table, index = "all", rank = 1, aggregate = TRUE) # dominance (Berger-Parker index), etc.
data_invsimpson <- vegan::diversity(t(otu_table), index = "invsimpson") # calculate Shannon index using vegan package
alpha_diversity <- cbind(S.obs, data_shannon, data_hill, data_invsimpson, data_evenness,data_dominance) # combine all indices in one data table
sample_data <- merge(data.frame(sample_data), alpha_diversity, by = 0, all = T) %>% column_to_rownames(var = "Row.names")
}
phyloseq <- phyloseq_unfiltered
sample_data(phyloseq_unfiltered$phyloseq_rel) <- sample_data(alpha_diversity(phyloseq_unfiltered$phyloseq_rel))
sample_data(phyloseq_unfiltered$phyloseq_count) <- sample_data(alpha_diversity(phyloseq_unfiltered$phyloseq_count))
sample_data(phyloseq_unfiltered$phyloseq_path_rpk) <- sample_data(alpha_diversity(phyloseq_unfiltered$phyloseq_path_rpk))
3.1. Screening of treatment effect
i. Did treatment change host % in qPCR results?
qPCR and seqeuncing
Fig. S1. qPCR figure
Figure S1. Host depletion effects measured by qPCR. (A) total DNA (16S bacterial DNA + human), (B) host DNA, (C) bacterial DNA, and (D) proportion of host DNA.
#2A: Change in total DNA (qPCR)
fS2a <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = log10(DNA_host_nondil + DNA_bac_nondil))) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
facet_wrap(~sample_type, scale = "free_x") +
ylab("log<sub>10</sub>(qPCR total DNA)<br>(ng/μL)") +
labs(tag = "A") +
guides(fill = guide_legend(nrow = 1))
#2B: Change in human DNA (qPCR)
fS2b <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = log10(DNA_host_nondil))) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
facet_wrap(~sample_type, scale = "free_x") +
ylab("log<sub>10</sub>(qPCR host DNA)<br>(ng/μL)") +
labs(tag = "B")
#2C: Change in 16S DNA (qPCR)
fS2c <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = log10(DNA_bac_nondil))) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
facet_wrap(~sample_type, scale = "free_x") +
ylab("log<sub>10</sub>(qPCR bacterial DNA)<br>(ng/μL)") +
labs(tag = "C")
#2D. Change in % host (qPCR)
fS2d <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = host_proportion)) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
facet_wrap(~sample_type, scale = "free_x") +
ylab("Host DNA ratio") +
labs(tag = "D")
#output for markdown
figureS2 <- ggarrange(fS2a, fS2b, fS2c, fS2d, common.legend = T , align = "hv")
figureS2
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS1.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 170, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
figureS2
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
Fig. S2. sequencing results
Fig. S2. (A) final reads after removing low quality reads and host-mapped reads and (B) sum of MetaPhlAn mapped reads by sample type.
#how were the samples failed in library prep?
figS1_final_reads <- sample_data %>% data.frame %>% mutate(total_read = phyloseq_unfiltered$phyloseq_count %>% otu_table %>% colSums()) %>%
ggplot(aes(x = reorder(baylor_other_id, -Final_reads),
y = log10(Final_reads + 1),
col = sample_type)) +
geom_point() +
theme_classic(base_family = "sans") +
theme(axis.title.y = element_markdown(), axis.text.x = element_blank()) +
ylab("log<sub>10</sub>(Final reads)") +
xlab("Samples") +
guides(col=guide_legend(title="Sample type")) +
scale_color_brewer(type = "qual", palette = 6) +
labs(tag = "A") +
ylim(c(0, 8.5))
figS1_total_reads <- sample_data %>% data.frame %>% mutate(total_read = phyloseq_unfiltered$phyloseq_count %>% otu_table %>% colSums()) %>%
ggplot(aes(x = reorder(baylor_other_id, -total_read),
y = log10(total_read + 1),
col = sample_type)) +
geom_point() +
theme_classic(base_family = "sans") +
theme(axis.title.y = element_markdown(), axis.text.x = element_blank()) +
ylab("log<sub>10</sub>(Sum of MetaPhlan mapped reads)") +
xlab("Samples") +
guides(col=guide_legend(title="Sample type")) +
scale_color_brewer(type = "qual", palette = 6) +
labs(tag = "B") +
ylim(c(0, 8.5))
figS1 <- ggarrange(figS1_final_reads, figS1_total_reads, ncol = 2, common.legend = T)
figS1
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS2.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 90, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
figS1
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
ii. How were changes in sequencing results?
Changes in sequencing output
Table 1 is generated later (after QCing to remove sequencing failed samples, etc.)
Library failure
Library failure - OR (all samples)
Stratified OR cannot be calculated, as some samples showed 0 library failure.
Effect size, standard error (SE) and t-value at a statistical test on library prep failure rate using generalized linear mixed effect model. glm ( sequencing fail ~ sample_type + treatment + sample_type * treatment + (1|subject_id) )
gm1 <- glmer(lib_failed ~ sample_type + treatment + sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame %>% dplyr::filter(sample_type %in% c("Sputum", "Nasal", "BAL")))
#Odd ratio calculation
#https://stackoverflow.com/questions/26417005/odds-ratio-and-confidence-intervals-from-glmer-output
gm1
## Linear mixed model fit by REML ['lmerMod']
## Formula: lib_failed ~ sample_type + treatment + sample_type * treatment +
## (1 | subject_id)
## Data: sample_data %>% data.frame %>% dplyr::filter(sample_type %in%
## c("Sputum", "Nasal", "BAL"))
## REML criterion at convergence: 40.3974
## Random effects:
## Groups Name Std.Dev.
## subject_id (Intercept) 0.05658
## Residual 0.25374
## Number of obs: 95, groups: subject_id, 20
## Fixed Effects:
## (Intercept) sample_typeNasal
## -8.148e-17 -5.220e-17
## sample_typeSputum treatmentlyPMA
## -4.491e-17 2.000e-01
## treatmentBenzonase treatmentHost zero
## 1.276e-16 2.000e-01
## treatmentMolysis treatmentQIAamp
## 2.000e-01 1.146e-16
## sample_typeNasal:treatmentlyPMA sample_typeSputum:treatmentlyPMA
## 6.037e-01 -2.000e-01
## sample_typeNasal:treatmentBenzonase sample_typeSputum:treatmentBenzonase
## -9.147e-03 -3.925e-17
## sample_typeNasal:treatmentHost zero sample_typeSputum:treatmentHost zero
## 1.909e-01 -2.000e-01
## sample_typeNasal:treatmentMolysis sample_typeSputum:treatmentMolysis
## 5.963e-01 -2.000e-01
## sample_typeNasal:treatmentQIAamp sample_typeSputum:treatmentQIAamp
## 9.147e-03 3.469e-17
cc <- confint(gm1,parm="beta_") ## slow (~ 11 seconds)
ctab <- cbind(OR=fixef(gm1), cc)
rtab <- exp(ctab)
#Using
#https://stackoverflow.com/questions/26417005/odds-ratio-and-confidence-intervals-from-glmer-output
rtab
## OR 2.5 % 97.5 %
## (Intercept) 1.0000000 0.8127810 1.230344
## sample_typeNasal 1.0000000 0.7757836 1.289019
## sample_typeSputum 1.0000000 0.7459048 1.340654
## treatmentlyPMA 1.2214028 0.9168368 1.627143
## treatmentBenzonase 1.0000000 0.7506425 1.332192
## treatmentHost zero 1.2214028 0.9168368 1.627143
## treatmentMolysis 1.2214028 0.9168368 1.627143
## treatmentQIAamp 1.0000000 0.7506425 1.332192
## sample_typeNasal:treatmentlyPMA 1.8288993 1.2488966 2.673698
## sample_typeSputum:treatmentlyPMA 0.8187308 0.5457289 1.228302
## sample_typeNasal:treatmentBenzonase 0.9908949 0.6777626 1.454511
## sample_typeSputum:treatmentBenzonase 1.0000000 0.6665548 1.500252
## sample_typeNasal:treatmentHost zero 1.2102817 0.8278211 1.776543
## sample_typeSputum:treatmentHost zero 0.8187308 0.5457289 1.228302
## sample_typeNasal:treatmentMolysis 1.8153634 1.2417694 2.658440
## sample_typeSputum:treatmentMolysis 0.8187308 0.5457289 1.228302
## sample_typeNasal:treatmentQIAamp 1.0091888 0.6875166 1.475443
## sample_typeSputum:treatmentQIAamp 1.0000000 0.6665548 1.500252
tableA1 <- rtab %>%
data.frame(check.names = F) %>%
round(3) %>%
mutate(` ` = case_when(`2.5 %` > 1 ~ "*", .default = " "),
`95% CI` = paste(`2.5 %`, `97.5 %`, sep = "-")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
dplyr::select(c("OR", "95% CI", " ")) %>%
kbl(format = "html", escape = 0) %>% kable_styling(full_width = 0, html_font = "sans")
tableA1
| OR | 95% CI | ||
|---|---|---|---|
| (Intercept) | 1.000 | 0.813-1.23 | |
| Nasal | 1.000 | 0.776-1.289 | |
| Sputum | 1.000 | 0.746-1.341 | |
| lyPMA | 1.221 | 0.917-1.627 | |
| Benzonase | 1.000 | 0.751-1.332 | |
| Host zero | 1.221 | 0.917-1.627 | |
| Molysis | 1.221 | 0.917-1.627 | |
| QIAamp | 1.000 | 0.751-1.332 | |
| Nasal * lyPMA | 1.829 | 1.249-2.674 |
|
| Sputum * lyPMA | 0.819 | 0.546-1.228 | |
| Nasal * Benzonase | 0.991 | 0.678-1.455 | |
| Sputum * Benzonase | 1.000 | 0.667-1.5 | |
| Nasal * Host zero | 1.210 | 0.828-1.777 | |
| Sputum * Host zero | 0.819 | 0.546-1.228 | |
| Nasal * Molysis | 1.815 | 1.242-2.658 |
|
| Sputum * Molysis | 0.819 | 0.546-1.228 | |
| Nasal * QIAamp | 1.009 | 0.688-1.475 | |
| Sputum * QIAamp | 1.000 | 0.667-1.5 |
Library failure (stratified)
glm ( sequencing fail ~ treatment + subject_id )
–> Cannot run for Sputum (no failed sample).
For BAL
glmer_libfail_bal <- glmer(lib_failed ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% dplyr::filter(sample_type %in% c("BAL")))
cc <- confint(glmer_libfail_bal,parm="beta_") ## slow (~ 11 seconds)
cbind(OR=fixef(glmer_libfail_bal), cc)
## OR 2.5 % 97.5 %
## (Intercept) -1.332488e-16 -0.2562488 0.2562489
## treatmentlyPMA 2.000000e-01 -0.1521594 0.5521595
## treatmentBenzonase 1.387779e-16 -0.3521594 0.3521595
## treatmentHost zero 2.000000e-01 -0.1521594 0.5521595
## treatmentMolysis 2.000000e-01 -0.1521594 0.5521595
## treatmentQIAamp 1.332268e-16 -0.3521594 0.3521595
For nasal
glmer_libfail_nasal <- glmer(lib_failed ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% dplyr::filter(sample_type %in% c("Nasal")))
cc <- confint(glmer_libfail_nasal,parm="beta_") ## slow (~ 11 seconds)
cbind(OR=fixef(glmer_libfail_nasal), cc)
## OR 2.5 % 97.5 %
## (Intercept) 3.753671e-17 -0.18022611 0.1802261
## treatmentlyPMA 8.000052e-01 0.48783954 1.1121590
## treatmentBenzonase -1.289629e-05 -0.31397681 0.3121580
## treatmentHost zero 3.999871e-01 0.08602319 0.7121580
## treatmentMolysis 7.999948e-01 0.48784093 1.1121605
## treatmentQIAamp 1.289629e-05 -0.31215793 0.3139768
### Library failure (Spt) - glm ( sequencing fail \~ treatment + subject_id ) - **Cannot run test; no failed sample **
Host ratio
Host ratio (all samples)
None-stratified. p-value of interaction term was 4.587e-16
lmer(sequencing_host_prop * 100 ~ sample_type * treatment + (1|subject_id),
data = sample_data %>%
data.frame %>%
mutate(total_read = phyloseq_unfiltered$phyloseq_count %>%
otu_table %>%
colSums()) %>%
subset(., total_read != 0)) %>%
anova()
Table S1. Host ratio (stratified)
Table S1. Effect size, standard error (SE) and
p-value at a statistical test on host DNA ratio using linear mixed
effect model with treatment as fixed effect and subject as random effect
using r package lmer::lmer( Host DNA % ~ treatment + (1|subject_id) ).
Stratified analyses were conducted for each sample type as an
interaction term of sample type and treatment was significant at an
ANOVA test (p-value < 0.001) using a model, LMER (Host DNA % ~ sample
type + treatment + sample type * treatment + (1|subject_id)
). The baseline of categorical variable is untreated, and statistical
significances were noted with *: p-value < 0.05 and
***: p-value < 0.001.
Host zero and Molysius was effect to to all QIAamp was effective for Nasal swab and Sputum
hr_lmer_bal <- lmer(sequencing_host_prop * 100 ~ treatment + (1|subject_id),
data = sample_data %>%
data.frame %>%
mutate(total_read = phyloseq_unfiltered$phyloseq_count %>%
otu_table %>%
colSums()) %>%
subset(., .$sample_type %in% c("BAL"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("bal_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
hr_lmer_ns <- lmer(sequencing_host_prop * 100 ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Nasal"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("ns_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = ),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = ),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = ),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
hr_lmer_spt <- lmer(sequencing_host_prop * 100 ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Sputum"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("spt_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tables1 <- cbind(hr_lmer_bal, hr_lmer_ns, hr_lmer_spt) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
tables1
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | ||||
|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 99.6 ( 88.7, 110.6) | 0.000 | *** | 95.2 ( 80.1, 110.4) | 0.000 | *** | 99.0 ( 89.1, 108.9) | 0.000 | *** |
| lyPMA | -3.1 (-16.7, 10.4) | 0.657 | -27.7 (-50.5, -4.9) | 0.026 |
|
-3.8 (-16.3, 8.7) | 0.558 | ||
| Benzonase | -1.1 (-14.7, 12.4) | 0.870 | -20.0 (-42.8, 2.9) | 0.099 | -6.3 (-18.8, 6.3) | 0.339 | |||
| Host zero | -18.3 (-31.8, -4.7) | 0.016 |
|
-73.6 (-96.4, -50.7) | 0.000 | *** | -45.5 (-58.0, -33.0) | 0.000 | *** |
| Molysis | -17.7 (-31.3, -4.1) | 0.019 |
|
-50.6 (-73.4, -27.8) | 0.000 | *** | -69.6 (-82.1, -57.1) | 0.000 | *** |
| QIAamp | -6.3 (-19.8, 7.3) | 0.376 | -75.4 (-98.3, -52.6) | 0.000 | *** | -18.7 (-31.2, -6.2) | 0.008 | ** |
save_kable(tables1, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS1.html", self_contained = T)
Final reads
Final reads (all samples)
None-stratified. p-value of interaction term was 3.955e-09
lmer(Final_reads * 100 ~ sample_type * treatment + (1|subject_id),
data = sample_data %>%
data.frame %>%
mutate(total_read = phyloseq_unfiltered$phyloseq_count %>%
otu_table %>%
colSums()) %>%
subset(., total_read != 0)) %>%
anova()
Table S2 Final reads (stratified)
lmer( Host DNA ratio ~ treatment + (1|subject_id) )
Table S2. Changes on final reads stratified by
sample type tested with linear mixed effect models using r package
lmer::lmer(log10(Final reads) ~ Treatment + (1|Subject id)). Stratified
analyses were conducted for each sample type as an interaction term of
sample type and treatment was significant at an ANOVA test (p-value <
0.001) using a model, LMER (log10(Final reads) ~ sample type + treatment
+ sample type * treatment + (1|subject_id) ). Effect size with adjusted
95% confidence intervals and p-value were listed. The unit of final read
is reads x 106. Statistical significances were noted with
*: p-value < 0.05, **: p-value < 0.01
and ***: p-value < 0.001.
Sputum’s final read increased after every treatment. Nasal swab showed improved reads with lyPMA, Host zero and QIAamp. BAL also showed increased reads with most of treatment.
fr_lmer_bal <- lmer(log10(Final_reads/1000000) ~ treatment + (1|subject_id),
data = sample_data %>%
data.frame %>%
mutate(total_read = phyloseq_unfiltered$phyloseq_count %>%
otu_table %>%
colSums()) %>%
subset(., .$sample_type %in% c("BAL"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("bal_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
fr_lmer_ns <- lmer(log10(Final_reads/1000000) ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Nasal"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("ns_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = ),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = ),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = ),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
fr_lmer_spt <- lmer(log10(Final_reads/1000000) ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Sputum"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("spt_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tables2 <- cbind(fr_lmer_bal, fr_lmer_ns, fr_lmer_spt) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
tables2
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | ||||
|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | -0.5 (-1.0, 0.0) | 0.050 | 0.5 ( 0.2, 0.8) | 0.004 | ** | -0.2 (-0.4, 0.0) | 0.120 | ||
| lyPMA | 0.4 (-0.2, 0.9) | 0.247 | -0.5 (-1.0, -0.1) | 0.040 |
|
0.5 ( 0.3, 0.8) | 0.001 | ** | |
| Benzonase | 0.8 ( 0.2, 1.4) | 0.012 |
|
0.1 (-0.3, 0.6) | 0.571 | 0.8 ( 0.6, 1.1) | 0.000 | *** | |
| Host zero | 1.0 ( 0.4, 1.5) | 0.004 | ** | 0.9 ( 0.4, 1.3) | 0.002 | ** | 1.7 ( 1.4, 1.9) | 0.000 | *** |
| Molysis | 1.0 ( 0.5, 1.6) | 0.002 | ** | 0.2 (-0.3, 0.7) | 0.407 | 2.0 ( 1.7, 2.3) | 0.000 | *** | |
| QIAamp | 1.0 ( 0.5, 1.6) | 0.002 | ** | 1.1 ( 0.6, 1.5) | 0.000 | *** | 1.4 ( 1.1, 1.7) | 0.000 | *** |
save_kable(tables2, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS2.html", self_contained = T)
Figure of sequencing result
Fig. S3. Figure of sequencing result
ii. How were changes in sequencing results?
Fig. S3. Host depletion effects measured by shotgun metagenomic sequencing. (A) Raw DNA reads, (B) host mapped reads by bowtie2, (C) final reads of microbes, and (D) proportion of host mapped among total mapped reads.
f3a <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = log10(Raw_reads))) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
ylab("log<sub>10</sub>(raw reads)") +
labs(tag = "A") +
facet_wrap(~sample_type, scale = "free_x") +
guides(fill = guide_legend(nrow = 1))
# - Host_mapped
f3b <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = log10(Host_mapped))) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
ylab("log<sub>10</sub>(host reads)") +
labs(tag = "B") +
facet_wrap(~sample_type, scale = "free_x") +
guides(fill = guide_legend(nrow = 1))
# - % Host (we have used Host_mapped/Raw_reads in prior papers)
# - Final_reads
f3c <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = log10(Final_reads))) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
ylab("log<sub>10</sub>(final reads)") +
labs(tag = "C") +
facet_wrap(~sample_type, scale = "free_x") +
guides(fill = guide_legend(nrow = 1))
# - % Host (we have used Host_mapped/Raw_reads in prior papers)
f3d <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = sample_type, y = sequencing_host_prop)) +
geom_jitter(aes(col = treatment, x = treatment), lwd = 0.2) +
stat_summary(aes(color = treatment, x = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
scale_x_discrete(name ="Treatment")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
axis.text.x = element_blank()) +
ylab("Host ratio by sequencing") +
labs(tag = "D") +
facet_wrap(~sample_type, scale = "free_x") +
guides(fill = guide_legend(nrow = 1))
figS3 <- ggarrange(f3a, f3b, f3c, f3d, common.legend = T, align = "hv")
figS3
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS3.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 170, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
figS3
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
iii. Was host DNA proportion of sequencing similar to that of qPCR? (secondary analysis)
Fig. S4. Host ratio qPCR vs sequencing
Peggy’s comment: metagenomics is gold standard for % host, but most people don’t have the money to do deep sequencing. So secondary analysis is to calculate correlation between %host by qPCR vs %host by metagenomics (this can be a supplementary figure but would at least mention correlation in text)
Fig. S4. (A) Correlation plot and (B) Bland-Altman plot between host DNA proportion measured with qPCR and shotgun metagenomic sequencing.
figS4a <- ggplot(sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")), aes(x = host_proportion, y = sequencing_host_prop, col = sample_type)) +
geom_point() +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#33a02c", "#1f78b4")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_x_discrete(name ="Sample type")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15),
legend.position = "top") +
ylab("% Host DNA (mNGS)") +
xlab("% Host DNA (qPCR)") +
labs(col = "Sample type") +
annotate(family = "sans",
geom='richtext',
x=0.5, y=1,
label = paste("R<sup>2</sup> = ",
lm(sequencing_host_prop ~ host_proportion,
data = sample_data %>%
data.frame %>%
subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")))
%>% summary %>% .$r.squared %>% round(., 2) %>% format(nsmall = 2), sep = "")) +
geom_smooth(method=lm , color="red", se=T, level = 0.95) +
guides(fill = guide_legend(nrow = 1)) +
labs(tag = "A")
bland_altman_data <- sample_data %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum")) %>% data.frame %>%
mutate(avg_two = (host_proportion + sequencing_host_prop)/2,
diff_two = sequencing_host_prop - host_proportion)
figS4b <- ggplot(bland_altman_data, aes(x = avg_two, y = diff_two, col = sample_type)) +
geom_point() +
theme_classic (base_size = 12, base_family = "sans")+
scale_color_manual(values = c("#e31a1c", "#33a02c", "#1f78b4")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_x_discrete(name ="Sample type")+
theme(axis.title.y = element_markdown(),
plot.tag = element_text(size = 15)) +
ylab("Difference between qPCR and mNGS") +
xlab("% Host DNA (mean)") +
geom_hline(yintercept = mean(bland_altman_data$diff_two), colour = "black", size = 0.5) +
geom_hline(yintercept = mean(bland_altman_data$diff_two) - (1.96 * sd(bland_altman_data$diff_two)), colour = "black", size = 0.5, linetype = "dashed") +
geom_hline(yintercept = mean(bland_altman_data$diff_two) + (1.96 * sd(bland_altman_data$diff_two)), colour = "black", size = 0.5, linetype = "dashed") +
labs(tag = "B")
figS4 <- ggarrange(figS4a, figS4b, common.legend = T, ncol = 1, align = "hv")
figS4
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS4.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height =180, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
figS4
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
3.2. Quality control of sequencing data
iv. Were there any contaminants in the sequencing result? (Do these host depletion methods introduce contamination)
decontam
Table S9. decontam - stratified by sample_type
Table S9. Summary table of potential contaminants and their prevalence across all samples (N = 113) and within negative controls (total N = 31). The contaminants were identified using decontam37 combined method (Fisher’s exact test result of prevalence and frequency method results), using 16S qPCR bacterial DNA concentration as total bacterial load at prevalence threshold of 0.1. The analyses were stratified by sample types.
#Stratified by sample type
prev_neg <- ((phyloseq_unfiltered$phyloseq_count %>%
subset_samples(sample_type == "Neg.") %>%
otu_table %>%
data.frame(.)) != 0) %>%
rowSums %>% data.frame %>% rename("Prevalence (negative controls)" = ".")
prev_all <- ((phyloseq_unfiltered$phyloseq_count %>%
otu_table %>%
data.frame(.)) != 0) %>%
rowSums %>% data.frame %>% rename("Prevalence (all)" = ".")
sample_data(phyloseq_unfiltered$phyloseq_rel)$is.neg <- grepl("Neg", sample_data(phyloseq_unfiltered$phyloseq_rel)$sample_type)
phyloseq_decontam_bal <- phyloseq_unfiltered$phyloseq_rel %>%
subset_samples(S.obs != 0) %>%
subset_samples(sample_type == "Neg." | sample_type == "BAL")
phyloseq_decontam_ns <- phyloseq_unfiltered$phyloseq_rel %>%
subset_samples(S.obs != 0) %>%
subset_samples(sample_type == "Neg." | sample_type == "Nasal")
phyloseq_decontam_spt <- phyloseq_unfiltered$phyloseq_rel %>%
subset_samples(S.obs != 0) %>%
subset_samples(sample_type == "Neg." | sample_type == "Sputum")
contaminant_combined_bal <-
data.frame("BAL", fix.empty.names = F,
isContaminant(phyloseq_decontam_bal, method="combined", neg = "is.neg", threshold = 0.1, conc = "DNA_bac_ng_uL") %>% subset(.,.$contaminant) %>% row.names
)
contaminant_combined_ns <-
data.frame("Nasal swab", fix.empty.names = F,
isContaminant(phyloseq_decontam_ns, method="combined", neg = "is.neg", threshold = 0.1, conc = "DNA_bac_ng_uL") %>% subset(.,.$contaminant) %>% row.names
)
contaminant_combined_spt <-
data.frame("Sputum", fix.empty.names = F,
isContaminant(phyloseq_decontam_spt, method="combined", neg = "is.neg", threshold = 0.1, conc = "DNA_bac_ng_uL") %>% subset(.,.$contaminant) %>% row.names
)
contaminants <- rbind(contaminant_combined_bal, contaminant_combined_ns, contaminant_combined_spt)
names(contaminants) <- c("Sample type", "Taxa")
merged_contaminants <- merge(contaminants, prev_all %>% rownames_to_column("Taxa"), by = "Taxa") %>%
merge(., prev_neg %>% rownames_to_column("Taxa"), by = "Taxa") %>%
dplyr::select(c("Sample type", "Taxa", "Prevalence (all)", "Prevalence (negative controls)")) %>%
.[order(.$"Sample type", .$"Taxa"),] %>%
remove_rownames()
species_italic2 <- function(data){
data <- gsub("_", " ", data)
data <- gsub("[]]|[[]", "", data)
data <- gsub(" sp", " sp.", data)
data <- gsub(" sp.", "</em> sp.", data)
data <- gsub(" group", "", data)
data <- ifelse(grepl("[*]", data), paste("<em>", data, sep = ""), paste("<em>", data, "</em>", sep = ""))
data
}
Decontam summary
Summary table of potential contaminants with all sample types and stratified sample types in all methods (prevalence, frequence, and combined)
tableS3 <- merged_contaminants %>%
mutate(Taxa = species_italic2(Taxa)) %>%
kbl(format = "html", escape = F) %>%
kable_styling(full_width = 0, html_font = "sans")
tableS3
| Sample type | Taxa | Prevalence (all) | Prevalence (negative controls) |
|---|---|---|---|
| BAL | Cupriavidus sp. | 113 | 25 |
| BAL | Cutibacterium acnes | 90 | 31 |
| BAL | Sutterella parvirubra | 61 | 1 |
| Nasal swab | Cupriavidus sp. | 113 | 25 |
| Nasal swab | Sutterella parvirubra | 61 | 1 |
| Sputum | Collinsella intestinalis | 30 | 0 |
| Sputum | Cupriavidus sp. | 113 | 25 |
| Sputum | Cutibacterium acnes | 90 | 31 |
| Sputum | Leptotrichia sp. oral taxon 212 | 4 | 0 |
| Sputum | Rothia aeria | 17 | 0 |
| Sputum | Streptococcus infantis | 27 | 0 |
save_kable(tableS3, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS9.html", self_contained = T)
Filtering taxa
Prevalence filtering
Prevalence & abundance filtering was conducted
M&M - Prevalence filtering
1. Prevalence filtration at 5%, except its abundance is over 0.75 quantile. a. Information in the main text
phyloseq_unfiltered$phyloseq_rel <- transform_sample_counts(phyloseq_unfiltered$phyloseq_rel,
function(x){x/sum(x)})
taxa_qc <- data.frame("species" =
otu_table(
subset_samples(
phyloseq_unfiltered$phyloseq_rel,S.obs != 0 &
sample_type %in% c("Mock", "BAL", "Nasal", "Sputum"))) %>%
t() %>% colnames(),
"prevalence" =
ifelse(subset_samples(phyloseq_unfiltered$phyloseq_rel, S.obs != 0 &
sample_type %in% c("Mock", "BAL", "Nasal", "Sputum")) %>%
otu_table() > 0, 1, 0)%>%
t() %>%
colSums(), #Prevalence of taxa
"mean_rel_abd" =
subset_samples(phyloseq_unfiltered$phyloseq_rel,
S.obs != 0 & sample_type %in% c("Mock", "BAL", "Nasal", "Sputum")) %>%
otu_table() %>%
t() %>%
colMeans(na.rm = T) #mean relativ abundacne
)
function_qc <- data.frame("function" =
otu_table(
subset_samples(
phyloseq_unfiltered$phyloseq_path_rpk,
S.obs != 0 &
sample_type %in%
c("Mock", "BAL", "Nasal", "Sputum")
)
) %>%
t() %>%
colnames(),
"prevalence" =
ifelse(subset_samples(phyloseq_unfiltered$phyloseq_path_rpk,
S.obs != 0 &
sample_type %in%
c("Mock", "BAL", "Nasal", "Sputum")
) %>%
otu_table() > 0,
1,
0
) %>%
t() %>%
colSums(), #Prevalence of taxa
"mean_rpk" =
subset_samples(phyloseq_unfiltered$phyloseq_path_rpk,
S.obs != 0 &
sample_type %in%
c("Mock", "BAL", "Nasal", "Sputum")
) %>%
otu_table() %>%
t() %>%
colMeans(na.rm = T), #mean relativ abundacne
unidentified =
ifelse((subset_samples(phyloseq_unfiltered$phyloseq_path_rpk,
S.obs != 0 & sample_type %in% c("Mock", "BAL", "Nasal", "Sputum")) %>%
otu_table() > 0) %>%
row.names() %in% c("UNMAPPED", "UNINTEGRATED")
, 1, 0)
)
red_flag_taxa <- data.frame(species = taxa_qc$species,
prevalence = taxa_qc$prevalence,
mean_rel_abd = taxa_qc$mean_rel_abd,
red_flag_prev_abd =
ifelse(taxa_qc$prevalence <
otu_table(
subset_samples(
phyloseq_unfiltered$phyloseq_rel,
S.obs != 0 &
sample_type %in%
c("Mock", "BAL", "Nasal", "Sputum"))) %>%
t %>% rownames() %>%
length * 0.05 &
#Removing taxa with zero prevalence - taxa from nasal swabs
taxa_qc$mean_rel_abd <
taxa_qc %>%
subset(., .$prevalence != 0) %>%
.$mean_rel_abd %>%
quantile(., 0.75), 1,0),
red_flag_prev =
ifelse(taxa_qc$prevalence <
otu_table(
subset_samples(
phyloseq_unfiltered$phyloseq_rel,
S.obs != 0 &
sample_type %in%
c("Mock", "BAL", "Nasal", "Sputum"))) %>%
t %>% rownames() %>%
length * 0.05,
1,
0)) %>%
mutate(red_flag_decontam = species %in% (contaminants$Taxa %>% unique()))
subset(red_flag_taxa, red_flag_taxa$red_flag_prev == 1 & red_flag_taxa$red_flag_prev_abd == 0)
#Unampped function were removed
red_flag_function <-
data.frame(function. = function_qc$function.,
prevalence = function_qc$prevalence,
mean_rel_abd = function_qc$mean_rpk,
red_flag_prev_abd =
ifelse(function_qc$prevalence <
otu_table(
subset_samples(
phyloseq_unfiltered$phyloseq_path_rpk,
S.obs != 0 &
sample_type %in%
c("Mock", "BAL", "Nasal", "Sputum"))) %>%
t %>%
rownames() %>%
length * 0.05 &
#Removing taxa with zero prevalence - taxa from nasal swabs
function_qc$mean_rpk <
function_qc %>%
subset(., .$prevalence != 0) %>%
.$mean_rpk %>%
quantile(., 0.75), 1,0),
red_flag_prev =
ifelse(function_qc$prevalence <
otu_table(
subset_samples(
phyloseq_unfiltered$phyloseq_path_rpk,
S.obs != 0 &
sample_type %in%
c("Mock", "BAL", "Nasal", "Sputum"))) %>%
t %>%
rownames() %>%
length * 0.05,
1,
0)) %>%
mutate(red_flag_prev_abd = case_when(function. %in% c("UNMAPPED", "UNINTEGRATED") ~ 1,
.default = red_flag_prev_abd))
subset(red_flag_function, red_flag_function$red_flag_prev == 1 & red_flag_function$red_flag_prev_abd == 0)
#decontaminated phyloseq
phyloseq_decontam <- phyloseq
phyloseq_decontam$phyloseq_count <- prune_taxa(subset(red_flag_taxa,
red_flag_taxa$red_flag_prev_abd != 1 &
!red_flag_taxa$red_flag_decontam)$species,
phyloseq$phyloseq_count)
phyloseq_decontam$phyloseq_rel <- prune_taxa(subset(red_flag_taxa,
red_flag_taxa$red_flag_prev_abd != 1 &
!red_flag_taxa$red_flag_decontam)$species,
phyloseq$phyloseq_rel) %>%
transform_sample_counts(., function(x){x/sum(x)})
#phyloseq for analysis
phyloseq$phyloseq_count <- prune_taxa(subset(red_flag_taxa,
red_flag_taxa$red_flag_prev_abd != 1)$species,
phyloseq$phyloseq_count)
phyloseq$phyloseq_rel <- prune_taxa(subset(red_flag_taxa,
red_flag_taxa$red_flag_prev_abd != 1)$species,
phyloseq$phyloseq_rel) %>%
transform_sample_counts(., function(x){x/sum(x)})
phyloseq$phyloseq_path_rpk <- prune_taxa(subset(red_flag_function, red_flag_function$red_flag_prev_abd != 1)$function., phyloseq$phyloseq_path_rpk)
#phyloseq$tree_phyloseq_count <- prune_taxa(subset(red_flag_taxa,
#red_flag_taxa$red_flag_prev_abd != 1 & !red_flag_taxa$red_flag_decontam_prev)$species,
#phyloseq$tree_phyloseq_count)
#phyloseq$tree_phyloseq_rel <- prune_taxa(subset(red_flag_taxa,
#red_flag_taxa$red_flag_prev_abd != 1 & !red_flag_taxa$red_flag_decontam_prev)$species,
#phyloseq$tree_phyloseq_rel)
Extras
Alpha diversity calculation
#Calculation of alpha diversity indices for filtered samples
alpha_diversity <- function(data) {
otu_table <- otu_table(data) %>% .[colSums(.) !=0]
S.obs <- rowSums(t(otu_table) != 0)
sample_data <- sample_data(data)
data_evenness <- vegan::diversity(t(otu_table)) / log(vegan::specnumber(t(otu_table))) # calculate evenness index using vegan package
data_shannon <- vegan::diversity(t(otu_table), index = "shannon") # calculate Shannon index using vegan package
data_hill <- exp(data_shannon) # calculate Hills index
data_dominance <- microbiome::dominance(otu_table, index = "all", rank = 1, aggregate = TRUE) # dominance (Berger-Parker index), etc.
data_invsimpson <- vegan::diversity(t(otu_table), index = "invsimpson") # calculate Shannon index using vegan package
alpha_diversity <- cbind(S.obs, data_shannon, data_hill, data_invsimpson, data_evenness,data_dominance) # combine all indices in one data table
sample_data <- merge(data.frame(sample_data), alpha_diversity, by = 0, all = T) %>% column_to_rownames(var = "Row.names")
}
#sample_data(phyloseq$phyloseq_count) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_rel) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_count) <- sample_data(alpha_diversity(phyloseq$phyloseq_count))
sample_data(phyloseq$phyloseq_path_rpk) <- sample_data(alpha_diversity(phyloseq$phyloseq_path_rpk))
sample_data(phyloseq_tv) <- sample_data(alpha_diversity(phyloseq_tv))
sample_data(phyloseq_decontam$phyloseq_rel) <- sample_data(alpha_diversity(phyloseq_decontam$phyloseq_count))
sample_data(phyloseq_decontam$phyloseq_count) <- sample_data(alpha_diversity(phyloseq_decontam$phyloseq_count))
sample_data <- sample_data(phyloseq$phyloseq_count)
Table 1 (revised)
Revised Table 1 was constructed after prevalence & abundacne filtering
Table 1. Sequencing results stratified by sample type and host depletion treatment. Number of samples in each experimental group, human and bacterial DNA measured by qPCR, number of samples did not pass the library QC or no microbial mapped reads, QC/d read, % host mapped reads, final reads, species, and function richness. Richness was calculated after employing prevalence and abundance filtration. Values depicted as N (%) or median (interquartile range).
sample_data(phyloseq$phyloseq_count)$sequencing_fail <-
ifelse(phyloseq$phyloseq_count %>%
sample_data %>%
.$S.obs == 0,
1,
0)
sample_data(phyloseq$phyloseq_count) <-
merge(phyloseq$phyloseq_count %>%
sample_data %>%
data.frame(check.names = F),
phyloseq$phyloseq_path_rpk %>% #extracting function richness as dataframe
sample_data %>%
data.frame(check.names = F) %>%
dplyr::select(c("S.obs")) %>%
rename(F.obs = "S.obs"),
by = 0
) %>%
column_to_rownames("Row.names")
table1 <- sample_data(phyloseq$phyloseq_count) %>% data.frame() %>%
dplyr::filter(sample_type %in% c("Sputum", "Nasal", "BAL")) %>%
group_by (sample_type, treatment) %>%
summarise(`N` = n(),
# `Total DNA <br>ng/µL` = paste(format(round(median(picogreen_ng_ul),2), nsmall = 2, big.mark = ","), "<br>(", format(round(quantile(picogreen_ng_ul, 0.25),2), nsmall = 2, big.mark = ","), ", ", format(round(quantile(picogreen_ng_ul, 0.75),2), nsmall = 2, big.mark = ","), ")", sep = ""),
`Human DNA <br>pg/µL` = paste(format(round(median(DNA_host_ng_uL*1000),1), nsmall = 1, big.mark = ","), " (", format(round(quantile(DNA_host_ng_uL*1000, 0.25),1), nsmall = 1, big.mark = ","), ", ", format(round(quantile(DNA_host_ng_uL*1000, 0.75),1), nsmall = 1, big.mark = ","), ")", sep = ""),
`Bacterial DNA <br>pg/µL` = paste(format(round(median(DNA_bac_ng_uL*1000),1), nsmall = 1, big.mark = ","), " (", format(round(quantile(DNA_bac_ng_uL*1000, 0.25),1), nsmall = 1, big.mark = ","), ", ", format(round(quantile(DNA_bac_ng_uL*1000, 0.75),1), nsmall = 1, big.mark = ","), ")", sep = ""),
`Sequencing fail<br>N (%)` = paste(sum(lib_failed + sequencing_fail), " (", sum(lib_failed +sequencing_fail) / n() * 100, " %)", sep = ""),
`QC'd reads<br>reads x 10<sup>6</sup>` = paste(format(round(median(Reads_after_trim/1000000),1), nsmall = 1, big.mark = ","), " (", format(round(quantile(Reads_after_trim/1000000, 0.25),1), nsmall = 1, big.mark = ","), ", ", format(round(quantile(Reads_after_trim/1000000, 0.75),1), nsmall = 1, big.mark = ","), ")", sep = ""),
`Host reads<br>%` = paste(format(round(median(sequencing_host_prop*100),1),
nsmall = 1, big.mark = ","),
" (",
format(round(quantile(sequencing_host_prop * 100,
0.25),
1),
nsmall = 1,
big.mark = ","),
", ",
format(round(quantile(sequencing_host_prop * 100, 0.75),1),
nsmall = 1,
big.mark = ","),
")",
sep = ""),
`Final reads<br>reads x 10<sup>6</sup>` = paste(format(round(median(Final_reads/1000000,
na.rm = T),
1),
nsmall = 1, big.mark = ","),
" (",
format(round(quantile(Final_reads/1000000,
0.25,
na.rm = T),
1),
nsmall = 1,
big.mark = ","),
", ",
format(round(quantile(Final_reads/1000000,
0.75,
na.rm = T),
1),
nsmall = 1,
big.mark = ","),
")",
sep = ""),
`Species<br>richness` = paste(median(S.obs, na.rm = T),
" (",
quantile(S.obs, 0.25, na.rm = T),
", ",
quantile(S.obs, 0.75, na.rm = T),
")",
sep = ""),
`Function<br>richness` = paste(median(F.obs, na.rm = T),
" (",
quantile(F.obs, 0.25, na.rm = T),
", ",
quantile(F.obs, 0.75, na.rm = T),
")",
sep = "")
) %>%
data.frame(check.names = F) %>%
arrange(sample_type, treatment) %>%
rename(`Sample` = sample_type, Treatment = treatment) %>%
mutate_all(linebreak) %>% kbl(format = "html", escape = F) %>% kable_styling(full_width = 0, html_font = "sans")
table1
| Sample | Treatment | N |
Human DNA pg/µL |
Bacterial DNA pg/µL |
Sequencing fail N (%) |
QC’d reads reads x 106 |
Host reads % |
Final reads reads x 106 |
Species richness |
Function richness |
|---|---|---|---|---|---|---|---|---|---|---|
| BAL | Untreated | 5 | 1,512.8 (1,237.9, 9,836.4) | 12.6 (10.5, 37.8) | 1 (20 %) | 129.5 (52.5, 129.9) | 99.7 (99.6, 99.7) | 0.3 (0.3, 0.4) | 3 (1, 3) | 6 (0, 8) |
| BAL | lyPMA | 5 | 2,139.7 (60.4, 6,255.6) | 8.4 (0.3, 17.4) | 2 (40 %) | 46.7 (28.6, 110.0) | 99.1 (97.8, 99.5) | 0.6 (0.4, 1.0) | 7 (3, 7) | 64 (14, 97) |
| BAL | Benzonase | 5 | 59.6 (47.8, 70.1) | 0.9 (0.7, 2.3) | 0 (0 %) | 149.3 (129.7, 183.7) | 98.8 (98.7, 98.9) | 1.7 (1.6, 2.2) | 6 (5, 7) | 152 (107, 163) |
| BAL | Host zero | 5 | 6.8 (2.3, 7.5) | 0.4 (0.3, 1.1) | 1 (20 %) | 31.9 (18.4, 35.1) | 83.7 (76.8, 87.2) | 2.4 (1.3, 8.2) | 8 (7, 11) | 210 (119, 219) |
| BAL | Molysis | 5 | 7.6 (6.6, 25.2) | 2.0 (0.3, 4.6) | 1 (20 %) | 39.0 (29.0, 39.3) | 92.5 (92.5, 93.6) | 2.9 (1.3, 15.6) | 17 (7, 43) | 216 (212, 245) |
| BAL | QIAamp | 5 | 33.1 (32.0, 79.5) | 0.5 (0.3, 1.9) | 0 (0 %) | 132.4 (119.6, 137.5) | 98.3 (92.3, 98.6) | 2.6 (1.0, 10.2) | 8 (6, 14) | 215 (30, 217) |
| Nasal | Untreated | 10 | 340.2 (202.3, 685.8) | 22.9 (16.9, 26.6) | 0 (0 %) | 106.2 (63.7, 138.7) | 94.1 (92.8, 97.9) | 4.8 (1.0, 8.7) | 11 (8.5, 12) | 138.5 (115.75, 152) |
| Nasal | lyPMA | 5 | 2.6 (0.8, 9.2) | 0.3 (0.3, 0.3) | 4 (80 %) | 7.9 (6.9, 9.7) | 91.2 (35.6, 91.6) | 0.7 (0.6, 0.8) | 5 (5, 7) | 136 (123, 168) |
| Nasal | Benzonase | 5 | 12.8 (1.9, 78.8) | 6.1 (5.4, 10.2) | 0 (0 %) | 47.1 (41.7, 53.2) | 78.7 (77.8, 94.8) | 2.8 (2.6, 10.4) | 8 (7, 14) | 185 (124, 187) |
| Nasal | Host zero | 5 | 0.5 (0.1, 0.7) | 7.6 (3.3, 15.8) | 2 (40 %) | 24.5 (11.7, 55.2) | 8.9 (2.7, 30.4) | 24.3 (9.7, 50.3) | 20 (19, 20) | 208 (195, 210) |
| Nasal | Molysis | 5 | 0.4 (0.0, 0.8) | 1.6 (1.1, 5.8) | 4 (80 %) | 8.1 (5.0, 34.9) | 49.9 (5.0, 78.4) | 3.2 (1.7, 25.3) | 12 (11, 20) | 193 (183, 200) |
| Nasal | QIAamp | 5 | 2.1 (0.9, 7.1) | 28.8 (24.7, 30.7) | 0 (0 %) | 56.2 (54.9, 58.5) | 20.1 (15.7, 23.2) | 46.3 (45.0, 46.7) | 17 (16, 20) | 206 (188, 210) |
| Sputum | Untreated | 5 | 39,231.5 (19,448.0, 59,430.9) | 245.3 (220.3, 311.0) | 0 (0 %) | 69.2 (68.0, 75.6) | 99.2 (98.9, 99.2) | 0.6 (0.6, 0.9) | 13 (10, 15) | 149 (143, 150) |
| Sputum | lyPMA | 5 | 9,779.5 (994.2, 11,437.5) | 97.8 (25.9, 100.9) | 0 (0 %) | 89.7 (42.0, 105.2) | 96.4 (92.5, 98.3) | 2.5 (1.5, 4.4) | 49 (42, 64) | 251 (241, 257) |
| Sputum | Benzonase | 5 | 154.3 (141.3, 349.0) | 33.2 (21.3, 53.0) | 0 (0 %) | 84.0 (82.0, 87.1) | 94.2 (92.9, 94.5) | 4.7 (4.5, 5.9) | 93 (83, 94) | 270 (263, 302) |
| Sputum | Host zero | 5 | 49.4 (11.7, 57.7) | 38.9 (33.6, 39.0) | 0 (0 %) | 106.2 (61.6, 114.8) | 61.7 (37.5, 68.0) | 29.1 (23.6, 36.7) | 130 (111, 132) | 307 (292, 320) |
| Sputum | Molysis | 5 | 13.6 (8.0, 26.3) | 28.4 (24.1, 30.4) | 0 (0 %) | 105.6 (90.8, 115.7) | 32.8 (17.0, 33.8) | 61.1 (55.6, 83.7) | 125 (122, 133) | 309 (305, 313) |
| Sputum | QIAamp | 5 | 241.6 (196.3, 273.5) | 64.3 (34.6, 71.0) | 0 (0 %) | 102.4 (100.9, 106.0) | 88.2 (68.9, 88.6) | 11.6 (11.3, 38.9) | 102 (89, 113) | 276 (270, 277) |
save_kable(table1, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/table1.html", self_contained = T)
+ α Does Amy’s analys result have changes in taxa?
This code takes a wile to run + 1800 lines of codes, therefore not included in this manuscript. (will be submitted separately).
Calculations used for main text
Figure 1 numbers
These tables were generated to note average med(IQR) of qPCR results
Figure 1 numbers (Host DNA) - treated vs untreated
sample_data %>% subset(., .$S.obs != 0) %>% group_by(sample_type, treated) %>% summarise(med = median(DNA_host_nondil + DNA_bac_nondil), lo = quantile(DNA_host_ng_uL + DNA_host_ng_uL, 0.25), high = quantile(DNA_host_nondil + DNA_bac_nondil, 0.75))
sample_data %>% subset(., .$S.obs != 0) %>%
group_by(sample_type, treated) %>% summarise(med = median(DNA_host_ng_uL), lo = quantile(DNA_host_ng_uL, 0.25), high = quantile(DNA_host_ng_uL, 0.75))
Figure 1 numbers (Host prooportion) - treated vs untreated
sample_data %>% subset(., .$S.obs != 0) %>% group_by(sample_type, treated) %>% summarise(med = median(host_proportion), lo = quantile(host_proportion, 0.25), high = quantile(host_proportion, 0.75))
Figure 1 numbers (bacterial DNA) - treated vs untreated
sample_data %>% subset(., .$S.obs != 0) %>% group_by(sample_type, treated) %>% summarise(med = median(DNA_bac_ng_uL), lo = quantile(DNA_bac_ng_uL, 0.25), high = quantile(DNA_bac_ng_uL, 0.75))
3.3. Effects of treatments on taxonomic composition
*Did taxonomic composition change?
Fig. 2 Overview of sequencing results
- PSL comments: I would move this to the supplement and replace with a figure that demonstrates the relative abundance of the 10 most abundant species (stratified by sample type) and 10 most abundant KEGG functions. The whole point is to highlight why you would want to do metagenomics rather than amplicon sequencing. Consider using the following qualitative color palette from colorbrewer:
Fig. 2. Relative abundances at species level by sample type after employing prevalence and abundance filtering and top 10 species in each sample type by mean abundance. (A) BAL, (B) nasal swabs, (C) sputum, and (D) mock community. Empty space indicates samples showed no microbial reads, i.e., sequencing failed samples.
my_plot_bar = function (physeq, x = "Sample", y = "Abundance", fill = NULL, title = NULL,
facet_grid = NULL) {
mdf = psmelt(physeq)
p = ggplot(mdf, aes_string(x = x, y = y, fill = fill))
p = p + geom_bar(stat = "identity")
p = p + theme(axis.text.x = element_text(angle = -90, hjust = 0)) +
scale_x_discrete(drop = F)
if (!is.null(facet_grid)) {
p <- p + facet_grid(facet_grid)
}
if (!is.null(title)) {
p <- p + ggtitle(title)
}
return(p)
}
a <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Mock")) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Mock") %>%
subset_samples(., S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Mock") %>%
subset_samples(., S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="species20") +
xlab("Subject") +
ylab("") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(size = 6),
legend.key.size = unit(3, "mm"),
legend.title = element_text(size = 6),
axis.text.x = element_text(color = "white")) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap (~ treatment, scales = "free_x", nrow = 1) +
ggtitle("D Mock")
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
b <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(.,
function(x){ifelse(is.na(x),
0,
x)})
) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="species20") +
ylab("") +
xlab("Subject") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(size = 6),
legend.key.size = unit(3, "mm"),
legend.title = element_text(size = 6),
axis.text.x = element_text(color = "white")) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
#facet_wrap (~ treatment, scales = "free_x", nrow = 1) +
facet_wrap ( ~ treatment,
scales= "free_x", nrow=1) +
ggtitle("E Negative control")
c <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "BAL") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "BAL") %>%
transform_sample_counts(.,
function(x){ifelse(is.na(x),
0,
x)})
) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "BAL") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
sample_data(phyloseq_temp)$subject_id <- sample_data(phyloseq_temp)$subject_id %>%
factor(labels = c("A", "B", "C", "D", "E"))
phyloseq_temp
} %>%
my_plot_bar(., x = "subject_id", fill="species20") +
ylab("") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(size = 6),
legend.key.size = unit(3, "mm"),
legend.title = element_text(size = 6),
axis.title.x = element_blank()) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
#facet_wrap (~ treatment, scales = "free_x", nrow = 1) +
facet_wrap ( ~ treatment,
scales= "free_x", nrow=1) +
ggtitle("A BAL")
d <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Nasal") %>%
subset_samples(., S.obs != 0)) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Nasal") %>%
subset_samples(., S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Nasal") %>%
subset_samples(., S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
sample_data(phyloseq_temp)$subject_id <- sample_data(phyloseq_temp)$subject_id %>%
factor(labels = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")) %>%
as.character()
phyloseq_temp
} %>%
my_plot_bar(., x = "subject_id", fill="species20") +
ylab("") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(size = 6),
legend.key.size = unit(3, "mm"),
legend.title = element_text(size = 6),
axis.title.x = element_blank()) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
# scale_x_discrete(drop=) +
facet_wrap(~ treatment, nrow = 1, drop = T, scales = "free_x") +
ggtitle("B Nasal swabs")
e <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Sputum") %>%
subset_samples(., S.obs != 0)) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Sputum") %>%
subset_samples(., S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Sputum") %>%
subset_samples(., S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
sample_data(phyloseq_temp)$subject_id <- sample_data(phyloseq_temp)$subject_id %>%
factor(labels = c("A", "B", "C", "D", "E"))
phyloseq_temp
} %>%
my_plot_bar(., x = "subject_id", fill="species20")+
ylab("") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(size = 6),
legend.key.size = unit(3, "mm"),
legend.title = element_text(size = 6),
axis.title.x = element_blank()) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap(~ treatment, scales= "free_x", nrow = 1) +
ggtitle("C Sputum")
fig2 <- ggarrange(c, c %>% lemon::g_legend() %>% as_ggplot,
d, d %>% lemon::g_legend() %>% as_ggplot,
e, e %>% lemon::g_legend() %>% as_ggplot,
a, a %>% lemon::g_legend() %>% as_ggplot,
#b, b %>% lemon::g_legend() %>% as_ggplot,
ncol=2,nrow=4, widths = c(3, 1),
legend = "none",
align = "hv")
annotate_figure(fig2,
left = text_grob("Relative abundance",
rot = 90,
family = "sans",
size = 11)
)
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/Figure2.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 220, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
annotate_figure(fig2,
left = text_grob("Relative abundance",
rot = 90,
family = "sans",
size = 11)
)
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
** 3 Candidat and 2 Staphylococcus –> can be used for a reason why we should be doing shotgun sequencigy (species level)
Dolosgranulum - repored present in nose Malassezia - fungi [others] –> [Other]
Unfiltered
Predicted function barplot
This figure is not included in the main text as it is having too taxonomic groups
phyloseq$phyloseq_path_cpm <- transform_sample_counts(phyloseq$phyloseq_path_rpk, function(x){x/sum(x)*1000000})
a <- tax_table(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Mock") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Mock") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "pathway"])
.[, 3] <- gsub("s__", " ", .[, 3])
.[, 3] <- gsub("_", " ", .[, 3])
.[, 3] <- gsub("[]]|[[]", "", .[, 3])
.[, 3] <- gsub(" sp", " sp.", .[, 3])
.[, 3] <- gsub(" sp.", "</i> sp.", .[, 3])
.[, 3] <- gsub(" group", "</i> group.", .[, 3])
.[, 3] <- ifelse(grepl("Other",.[, 3]),
"Other",
ifelse(grepl("</i>", .[, 3]),
paste("<i>", .[, 3], sep = ""),
paste("<i>", .[, 3], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Mock") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(),
axis.text.x = element_text(size = 0)) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap (~ treatment, scales= "free_x", nrow = 1) +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
labs(tag="D") +
ggtitle("Mock")
b <- tax_table(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "pathway"])
.[, 3] <- gsub("s__", " ", .[, 3])
.[, 3] <- gsub("_", " ", .[, 3])
.[, 3] <- gsub("[]]|[[]", "", .[, 3])
.[, 3] <- gsub(" sp", " sp.", .[, 3])
.[, 3] <- gsub(" sp.", "</i> sp.", .[, 3])
.[, 3] <- gsub(" group", "</i> group.", .[, 3])
.[, 3] <- ifelse(grepl("Other",.[, 3]),
"Other",
ifelse(grepl("</i>", .[, 3]),
paste("<i>", .[, 3], sep = ""),
paste("<i>", .[, 3], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(),
axis.text.x = element_text(size = 0)) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap (~ treatment, scales= "free_x", nrow = 1) +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
labs(tag="E") +
ggtitle("Negative controls")
c <- tax_table(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "BAL") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "BAL") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "pathway"])
.[, 3] <- gsub("s__", " ", .[, 3])
.[, 3] <- gsub("_", " ", .[, 3])
.[, 3] <- gsub("[]]|[[]", "", .[, 3])
.[, 3] <- gsub(" sp", " sp.", .[, 3])
.[, 3] <- gsub(" sp.", "</i> sp.", .[, 3])
.[, 3] <- gsub(" group", "</i> group.", .[, 3])
.[, 3] <- ifelse(grepl("Other",.[, 3]),
"Other",
ifelse(grepl("</i>", .[, 3]),
paste("<i>", .[, 3], sep = ""),
paste("<i>", .[, 3], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "BAL") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(), axis.text.x = element_blank()) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap (~ treatment, scales= "free_x", nrow = 1) +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
labs(tag="A") +
ggtitle("BAL")
d <- tax_table(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Nasal") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Nasal") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "pathway"])
.[, 3] <- gsub("s__", " ", .[, 3])
.[, 3] <- gsub("_", " ", .[, 3])
.[, 3] <- gsub("[]]|[[]", "", .[, 3])
.[, 3] <- gsub(" sp", " sp.", .[, 3])
.[, 3] <- gsub(" sp.", "</i> sp.", .[, 3])
.[, 3] <- gsub(" group", "</i> group.", .[, 3])
.[, 3] <- ifelse(grepl("Other",.[, 3]),
"Other",
ifelse(grepl("</i>", .[, 3]),
paste("<i>", .[, 3], sep = ""),
paste("<i>", .[, 3], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Nasal") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(), axis.text.x = element_blank()) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap (~ treatment, scales= "free_x", nrow = 1) +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
labs(tag="B") +
ggtitle("Nasal swabs")
e <- tax_table(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Sputum") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Sputum") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "pathway"])
.[, 3] <- gsub("s__", " ", .[, 3])
.[, 3] <- gsub("_", " ", .[, 3])
.[, 3] <- gsub("[]]|[[]", "", .[, 3])
.[, 3] <- gsub(" sp", " sp.", .[, 3])
.[, 3] <- gsub(" sp.", "</i> sp.", .[, 3])
.[, 3] <- gsub(" group", "</i> group.", .[, 3])
.[, 3] <- ifelse(grepl("Other",.[, 3]),
"Other",
ifelse(grepl("</i>", .[, 3]),
paste("<i>", .[, 3], sep = ""),
paste("<i>", .[, 3], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_path_cpm,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="species20") +
ylab("Relative abundancne") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(), axis.text.x = element_blank()) +
guides(fill=guide_legend(title="Top 10 species")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap (~ treatment, scales= "free_x", nrow = 1) +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
labs(tag="C") +
ggtitle("Sputum")
a
b
c
d
e
v. Were there any bias in Mock community?
Gram-stain analysis
Fig. S5. Bar plot of gram-stain
Fig. S5. Bar plot annotated with gram-stain information of (A) BAL, (B) nasal swabs, (C) sputum, and (D) mock communities after each treatment.
a <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Mock")) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Mock") %>%
subset_samples(., S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Mock") %>%
subset_samples(., S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="gram_stain") +
ylab("") +
xlab("Subject") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(),
axis.title.y = element_blank(),
axis.text.x = element_blank()) +
guides(fill=guide_legend(title="Gram-stain")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap (~ treatment, scales = "free_x", nrow = 1) +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
ggtitle("D Mock")
b <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(.,
function(x){ifelse(is.na(x),
0,
x)})
) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Neg.") %>%
subset_samples(.,
baylor_other_id != "20220606_Neg") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
phyloseq_temp
} %>%
my_plot_bar(., fill="gram_stain") +
ylab("") +
xlab("Subject") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(),
axis.text.x = element_blank()) +
guides(fill=guide_legend(title="Gram-stain")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
#facet_wrap (~ treatment, scales = "free_x", nrow = 1) +
facet_wrap ( ~ treatment,
scales= "free_x", nrow=1) +
ggtitle("E Negative controls")
c <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "BAL") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "BAL") %>%
transform_sample_counts(.,
function(x){ifelse(is.na(x),
0,
x)})
) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "BAL") %>%
transform_sample_counts(., function(x){ifelse(is.na(x), 0, x)})
tax_table(phyloseq_temp) <- tax_table(.)
sample_data(phyloseq_temp)$subject_id <- sample_data(phyloseq_temp)$subject_id %>%
factor(labels = c("A", "B", "C", "D", "E"))
phyloseq_temp
} %>%
my_plot_bar(., x = "subject_id", fill="gram_stain") +
ylab("") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(),
axis.title.y = element_blank(),
axis.title.x = element_blank()) +
guides(fill=guide_legend(title="Gram-stain")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
#facet_wrap (~ treatment, scales = "free_x", nrow = 1) +
facet_wrap ( ~ treatment,
scales= "free_x", nrow=1) +
ggtitle("A BAL")
d <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Nasal") %>%
subset_samples(., S.obs != 0)) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Nasal") %>%
subset_samples(., S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Nasal") %>%
subset_samples(., S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
sample_data(phyloseq_temp)$subject_id <- sample_data(phyloseq_temp)$subject_id %>%
factor(labels = c("A", "B", "C", "D", "E", "F", "G", "H", "I", "J")) %>%
as.character()
phyloseq_temp
} %>%
my_plot_bar(., x = "subject_id", fill="gram_stain") +
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
guides(fill=guide_legend(title="Gram-stain")) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
# scale_x_discrete(drop=) +
facet_wrap(~ treatment, nrow = 1, drop = T, scale = "free_x") +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
ggtitle("B Nasal swabs")
e <- tax_table(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Sputum") %>%
subset_samples(., S.obs != 0)) %>%
cbind(species20 = "[Other]") %>%
{top20species <- head(taxa_sums(subset_samples(phyloseq$phyloseq_rel,
sample_type == "Sputum") %>%
subset_samples(., S.obs != 0)) %>%
data.frame %>%
arrange(-.) %>%
row.names(), 10)
.[top20species, "species20"] <- as.character(.[top20species, "Species"])
.[, 9] <- gsub("s__", " ", .[, 9])
.[, 9] <- gsub("_", " ", .[, 9])
.[, 9] <- gsub("[]]|[[]", "", .[, 9])
.[, 9] <- gsub(" sp", " sp.", .[, 9])
.[, 9] <- gsub(" sp.", "</i> sp.", .[, 9])
.[, 9] <- gsub(" group", "</i> group.", .[, 9])
.[, 9] <- ifelse(grepl("Other",.[, 9]),
"Other",
ifelse(grepl("</i>", .[, 9]),
paste("<i>", .[, 9], sep = ""),
paste("<i>", .[, 9], "</i>", sep = "")) %>%
gsub("s__", "", .) %>%
gsub("_", " ", .)
)
phyloseq_temp <- subset_samples(phyloseq$phyloseq_rel,
sample_type == "Sputum") %>%
subset_samples(., S.obs != 0)
tax_table(phyloseq_temp) <- tax_table(.)
sample_data(phyloseq_temp)$subject_id <- sample_data(phyloseq_temp)$subject_id %>%
factor(labels = c("A", "B", "C", "D", "E"))
phyloseq_temp
} %>%
my_plot_bar(., x = "subject_id", fill="gram_stain")+
theme_classic(base_size = 11, base_family = "sans") +
theme(legend.text = element_markdown(),
axis.title.y = element_blank(),
axis.title.x = element_blank()) +
guides(fill=guide_legend(title="Gram-stain")) +
#scale_x_discrete(drop=F) +
scale_fill_manual(values = c(RColorBrewer::brewer.pal(n = 11, name = "Paired"))) +
facet_wrap(~ treatment, scales= "free_x", nrow = 1) +
#facet_wrap (~ factor(sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum")),
# scales= "free_x", nrow=2) +
ggtitle("C Sputum")
figS5 <- ggarrange(c, d, e, a, ncol = 1, common.legend = T, legend = "top")
figS5
annotate_figure(figS5,
left = text_grob("Relative abundance",
rot = 90,
family = "sans",
size = 11)
)
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS5.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 200, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
annotate_figure(figS5,
left = text_grob("Relative abundance",
rot = 90,
family = "sans",
size = 11)
)
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
Gram-stain stats (all samples)
Effect size, standard error (SE) and p-value at a statistical test on gram-negative proportion using linear mixed effect model. lmer( Gram-negative proportion vs sample_type + treatment + sample_type * treatment + (1|subject_id) )
Interaction term was significant (p-value = 0.018771)
lmer(gram_neg_prop ~ sample_type + treatment + sample_type * treatment + (1|subject_id),
data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum"))) %>%
anova
Table S3. Gram-stain stats -stratified
Stratified analsyis
Table S3. Effect size, standard error (SE) and p-value at a
statistical test on gram-negative proportion using linear mixed effect
model. LMER(Gram-negative proportion vs sample type + treatment + sample
type * treatment + (1|subject id)). Stratified analyses were conducted
for each sample type as an interaction term of sample type and treatment
was significant at an ANOVA test (p-value < 0.001) using a model,
LMER (Gram-negative proportion ~ sample type + treatment + sample type *
treatment + (1|subject_id) ). The baseline of categorical variables is
untreated BAL, and statistical significances were noted with
*: p-value < 0.05 and ***: p-value < 0.001.
tableS5_mock <-
lm(gram_neg_prop ~ treatment,
data = sample_data(phyloseq$phyloseq_rel) %>% data.frame %>% subset(., .$sample_type %in% c("Mock"))) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 2) %>% format(nsmall = 2),
" (",
round(Estimate - 1.96 * SE, 2) %>% format(nsmall = 2),
", ",
round(Estimate + 1.96 * SE, 2) %>% format(nsmall = 2),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tableS5_bal <- lmer(gram_neg_prop ~ treatment + (1|subject_id),
data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("BAL"))) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 2) %>% format(nsmall = 2),
" (",
round(Estimate - 1.96 * SE, 2) %>% format(nsmall = 2),
", ",
round(Estimate + 1.96 * SE, 2) %>% format(nsmall = 2),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tableS5_ns <- lmer(gram_neg_prop ~ treatment + (1|subject_id),
data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Nasal"))) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 2) %>% format(nsmall = 2),
" (",
round(Estimate - 1.96 * SE, 2) %>% format(nsmall = 2),
", ",
round(Estimate + 1.96 * SE, 2) %>% format(nsmall = 2),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tableS5_spt <- lmer(gram_neg_prop ~ treatment + (1|subject_id),
data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Sputum"))) %>%
summary %>% .$coefficients %>% data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 2) %>% format(nsmall = 2),
" (",
round(Estimate - 1.96 * SE, 2) %>% format(nsmall = 2),
", ",
round(Estimate + 1.96 * SE, 2) %>% format(nsmall = 2),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tableS3 <- cbind(tableS5_mock, tableS5_bal, tableS5_ns, tableS5_spt) %>%
kbl(format = "html", escape = 0) %>% kable_styling(full_width = 0, html_font = "sans") %>%
add_header_above(c(" " = 1, "Mock" = 3, "BAL" = 3, "Nasal swab" = 3, "Sputum" = 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
tableS3
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 51.13 ( 46.93, 55.33) | 0 | *** | 32.85 ( -4.10, 69.80) | 0.123 | 0.84 (-2.60, 4.29) | 0.635 | 61.64 ( 51.14, 72.15) | 0 | *** | ||
| lyPMA | -23.52 (-29.75, -17.29) | 0 | *** | 6.42 (-23.21, 36.04) | 0.676 | 19.41 (13.83, 24.99) | 0.000 | *** | -40.92 (-53.57, -28.27) | 0 | *** | |
| Benzonase | -51.08 (-57.31, -44.85) | 0 | *** | 15.33 (-14.29, 44.95) | 0.322 | 1.89 (-3.70, 7.48) | 0.514 | -52.49 (-65.14, -39.85) | 0 | *** | ||
| Host zero | -51.13 (-57.36, -44.90) | 0 | *** | 8.61 (-21.01, 38.23) | 0.575 | 0.03 (-5.56, 5.61) | 0.993 | -59.87 (-72.52, -47.22) | 0 | *** | ||
| Molysis | -50.64 (-56.87, -44.40) | 0 | *** | -0.86 (-30.48, 28.76) | 0.955 | 2.28 (-3.31, 7.86) | 0.432 | -59.87 (-72.52, -47.22) | 0 | *** | ||
| QIAamp | -51.05 (-57.28, -44.82) | 0 | *** | 7.31 (-22.32, 36.93) | 0.634 | 0.13 (-5.45, 5.72) | 0.963 | -60.59 (-73.24, -47.95) | 0 | *** |
save_kable(tableS3, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS4.html", self_contained = T)
*vi. Did diversity matrices change?
Taxa change
Fig. 3 Alpha and beta diversity
Fig. 3. Alpha and beta diversity by sample type and treatment method after removing potential contaminants and rare taxa. (A) Species richness with statistical test results (linear mixed effect model stratified by sample type), (B) Morisita-Horn index within subject between treatment, representing squares for median value and bars for 95% confidence intervals.
- PSL comments (20230630): - Figure 3A: it is hard to see some of the
boxplot colors due to the narrow interquartile range. Can you use
stat_summary and geom = “pointrange” like what Maghini DG et al did for
their Figure 3b or create dotplot + pointrange geom like their Figure
3c? I found some example code goodgling though you might want to show
median + iqr rather than mean + sd stat_summary(fun = mean, geom =
“pointrange”, fun.max = function(x) mean(x) + sd(x), fun.min =
function(x) mean(x) - sd(x))
- Figure 3A: we also need to talk because the mock community is only supposed to have a species richness = 10. I know that some of this may be due to taxonomic misclassification but that immediately will raise issues with the reviewer so we need to anticipate how to address this in advance
- Figure 3A: add relevant p-values using horizontal bars and stars for significance like Maghini DG et al did for Figure 3B
- Figure 3A: why not just do what Maghini DG et al did for Figure 3e? But have one panel per sample type
f3a <- ggplot(subset(sample_data(phyloseq$phyloseq_count) %>%
data.frame, sample_data(phyloseq$phyloseq_count)$sample_type %in% c("Sputum", "Nasal", "BAL", "Mock")), aes(x = treatment, y = S.obs)) +
geom_jitter(aes(color = treatment), position = position_jitter(0.2), size = 1.2) +
stat_summary(aes(color = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
ylab("Species richness") +
xlab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using
labs(tag = "A") +
theme(plot.tag = element_text(size = 15),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top") +
facet_wrap(~sample_type, nrow = 1) +
guides(col = guide_legend(nrow = 1))
dat_text <- data.frame(
label = c(
"", "***", "***", "**", "***", #label for Mock
"", "", "", "*", "", #label for BAL
"", "", "***", "*", "**",
"**", "***", "***", "***", "***"),
sample_type = c(
"Mock", "Mock", "Mock", "Mock", "Mock",
"BAL", "BAL", "BAL", "BAL", "BAL",
"Nasal", "Nasal", "Nasal", "Nasal", "Nasal",
"Sputum", "Sputum", "Sputum", "Sputum", "Sputum"),
treatment = c(
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp",
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp",
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp",
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
S.obs = c(
50, 30, 35, 33, 31,
30, 35, 50, 52, 50,
30, 30, 30, 35, 30,
100, 120, 147, 140, 125)
)
dat_text$sample_type <- factor(dat_text$sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum"))
dat_text$treatment <- factor(dat_text$treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"))
f3a <- f3a + geom_text(
data = dat_text,
mapping = aes(x = treatment, y = S.obs, label = label)
)
#Making subset of non-zero samples without neg
phyloseq_rel_nz <- phyloseq$phyloseq_rel %>%
subset_samples(S.obs != 0 & sample_type %in% c("Mock", "BAL", "Nasal", "Sputum"))
#distances of betadiversity - boxplots
horn_dist_long <- distance(phyloseq_rel_nz, method="horn") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `horn_dist_long`
names <- data.frame(str_split_fixed(horn_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(horn_dist_long$iso2, "_", 3))
horn_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
horn_dist_long$method_1 <- ifelse(grepl("lyPMA", horn_dist_long$iso1),"lypma",
ifelse(grepl("ben", horn_dist_long$iso1),"benzonase",
ifelse(grepl("host", horn_dist_long$iso1),"host_zero",
ifelse(grepl("qia", horn_dist_long$iso1),"qiaamp",
ifelse(grepl("moly", horn_dist_long$iso1),"molysis",
"control")))))
#Adding data for iso 2 also should be done
horn_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
horn_dist_long$method_2 <-ifelse(grepl("lyPMA", horn_dist_long$iso2),"lypma",
ifelse(grepl("ben", horn_dist_long$iso2),"benzonase",
ifelse(grepl("host", horn_dist_long$iso2),"host_zero",
ifelse(grepl("qia", horn_dist_long$iso2),"qiaamp",
ifelse(grepl("moly", horn_dist_long$iso2),"molysis",
"control")))))
#subsetting distances of my interest
horn_dist_long$sample_id_1 <- ifelse(grepl("pos", horn_dist_long$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_1, ignore.case = T),"Neg.",
horn_dist_long$sample_id_1))
horn_dist_long$sample_id_2 <- ifelse(grepl("pos", horn_dist_long$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_2, ignore.case = T),"Neg.",
horn_dist_long$sample_id_2))
path_horn_dist_long_within_sampleid_from_control <- subset(horn_dist_long, horn_dist_long$sample_id_1 == horn_dist_long$sample_id_2) # data within samples
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control,
path_horn_dist_long_within_sampleid_from_control$method_1 != path_horn_dist_long_within_sampleid_from_control$method_2) # remove irrelevant association
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control, (path_horn_dist_long_within_sampleid_from_control$method_1 == "control") + (path_horn_dist_long_within_sampleid_from_control$method_2 == "control") != 0)
path_horn_dist_long_within_sampleid_from_control$treatment <- path_horn_dist_long_within_sampleid_from_control$method_1
path_horn_dist_long_within_sampleid_from_control$treatment <- ifelse(path_horn_dist_long_within_sampleid_from_control$treatment == "control", path_horn_dist_long_within_sampleid_from_control$method_2, path_horn_dist_long_within_sampleid_from_control$treatment)
#Setting key method
path_horn_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_horn_dist_long_within_sampleid_from_control$iso1), "Nasal",
ifelse(grepl("CFB", path_horn_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_horn_dist_long_within_sampleid_from_control$iso1), "BAL",
ifelse(grepl("pos|POS", path_horn_dist_long_within_sampleid_from_control$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", path_horn_dist_long_within_sampleid_from_control$iso1), "Neg.",NA)))))
#Making a column for baseline (controls, from where?)
path_horn_dist_long_within_sampleid_from_control <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(dist_from = case_when(method_1 == "control" ~ iso1,
method_2 == "control" ~ iso2))
dummy <- data.frame(iso1 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
iso2 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
dist = 0,
treatment = "Untreated",
method_1 = "control",
method_2 = "control"
)
names <- data.frame(str_split_fixed(dummy$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(dummy$iso2, "_", 3))
dummy$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
#Adding data for iso 2 also should be done
dummy$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
#subsetting distances of my interest
dummy$sample_id_1 <- ifelse(grepl("pos", dummy$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", dummy$sample_id_1, ignore.case = T),"Neg.",
dummy$sample_id_1))
dummy$sample_id_2 <- ifelse(grepl("pos", dummy$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", dummy$sample_id_2, ignore.case = T),"Neg.",
dummy$sample_id_2))
dummy$sample_type <- ifelse(grepl("NS", dummy$iso1), "Nasal",
ifelse(grepl("CFB", dummy$iso1), "Sputum",
ifelse(grepl("BAL", dummy$iso1), "BAL",
ifelse(grepl("pos|POS", dummy$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", dummy$iso1), "Neg.",NA)))))
dummy <- subset(dummy, !is.na(dummy$sample_type))
path_horn_dist_long_within_sampleid_from_control <- bind_rows(path_horn_dist_long_within_sampleid_from_control, dummy)
#Making figure of beta diversity distances
f3b2 <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(across(sample_type, factor, levels=c("Mock", "BAL", "Nasal","Sputum"))) %>%
subset(., .$sample_type != "Neg.") %>%
group_by(sample_type, treatment) %>%
summarise(mean = mean(dist, na.rm = TRUE),
sd = sd(dist, na.rm = TRUE),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
treatment = factor(treatment, levels = c("Untreated", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp"))) %>%#,
#text = paste(sprintf("%.2f", round(mean, digits = 2)), " [", sprintf("%.2f", round(lower.ci, digits = 2)), ", ", sprintf("%.2f", round(upper.ci, digits = 2)), "]", sep = "")) %>%
ggplot(aes(x = mean, y = treatment, col = treatment)) +
geom_point(aes(x=mean), shape=15, size=3) +
geom_linerange(aes(xmin=lower.ci, xmax=upper.ci)) +
facet_wrap(~sample_type, nrow = 4) +
scale_y_discrete(limits=rev) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
xlab("Distance from untreated") +
ylab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
theme(plot.tag = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none") +
labs(tag = "B") +
geom_vline(xintercept = 0, col = "black", linetype="dotted") +
#coord_cartesian(xlim=c(-0.5, 1)) +
#geom_text(aes(x = 0, label = treatment), hjust = 0, nudge_x = -.55, size = 3, color = "black", family = "sans") +
#geom_text(aes(x = 0, label = text), hjust = 0, nudge_x = -0.4, size = 3, color = "black", family = "sans") +
scale_x_continuous(breaks = c(-0.5, 0, 0.5, 1, 1.5), labels = c(-0.5, "0 (low bias)", 0.5, 1, "1.5 (high bias)"))
fig3 <- ggarrange(f3a, f3b2, ncol = 1, common.legend = T, align = "hv")
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/Figure3.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 220, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
fig3
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
vii. & viii. Do these host depletion methods introduce bias in the sequenced community?
- Does bias differ by sample type?
Alpha diversity changes
Calculation for log centered Final reads
Distribution of centralized final reads
sample_data <- sample_data(phyloseq$phyloseq_count)
sample_data$log_centered_final_reads <- log(sample_data$Final_reads + 1) - median(log((subset(sample_data, sample_data$sample_type %in% c("BAL") & sample_data$treatment %in% c("Untreated")) %>% .$Final_reads) + 1))
sample_data$bal_log_centered_final_reads <- log(sample_data$Final_reads + 1) - median(log((subset(sample_data, sample_data$sample_type %in% c("BAL") & sample_data$treatment %in% c("Untreated"))%>% .$Final_reads) + 1))
sample_data$ns_log_centered_final_reads <- log(sample_data$Final_reads + 1) - median(log((subset(sample_data, sample_data$sample_type %in% c("Nasal") & sample_data$treatment %in% c("Untreated"))%>% .$Final_reads) + 1))
sample_data$spt_log_centered_final_reads <- log(sample_data$Final_reads + 1) - median(log((subset(sample_data, sample_data$sample_type %in% c("Sputum") & sample_data$treatment %in% c("Untreated"))%>% .$Final_reads) + 1))
subset(sample_data, sample_data$sample_type %in% c("BAL", "Nasal", "Sputum")) %>% .$log_centered_final_reads %>% hist (main = "Histogram of centered log10 final reads of BAL, Nasal, Sputum")
subset(sample_data, sample_data$sample_type %in% c("BAL")) %>% .$log_centered_final_reads %>% hist (main = "Histogram of centered log10 final reads of BAL")
subset(sample_data, sample_data$sample_type %in% c("Nasal")) %>% .$log_centered_final_reads %>% hist (main = "Histogram of centered log10 final reads of Nasal")
subset(sample_data, sample_data$sample_type %in% c("Sputum")) %>% .$log_centered_final_reads %>% hist (main = "Histogram of centered log10 final reads of Sputum")
Species richness (all sample)
This table is too redundant. Removed from the manuscript..
Effect size, standard error (SE) and p-value of a statistical test for species richness with an interaction term using linear mixed effect model (Species richness ~ sample_type * treatment + log10 (Final_reads) + (1|subject_id) ).
Interaction term was highly significant (p = 2.2e-16)
lmer(S.obs ~ treatment * sample_type + log10(Final_reads) + (1|subject_id),
data = sample_data %>%
data.frame %>%
subset(., .$S.obs != 0)) %>%
anova()
Table S4. Species richness (stratified) without depth
Table S4. Effect size, standard error (SE) and
p-value of a statistical test for species richness using linear mixed
effect model stratified by sample type (Species richness ~ treatment +
(1|subject_id) ). Stratified analyses were conducted for each sample
type as an interaction term of sample type and treatment was significant
at an ANOVA test (p-value < 0.001) using a model, lmer(species
richness ~ sample type + treatment + sample type * treatment +
(1|subject_id)). Statistical significances were noted with
*: p-value < 0.05, **: p-value < 0.01, and ***: p-value < 0.001.
sr_lmer_mock <- lm(S.obs ~ treatment,
data = sample_data %>%
data.frame %>%
subset(., .$sample_type %in% c("Mock") & S.obs != 0)) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("bal_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_bal <- lmer(S.obs ~ treatment + (1|subject_id),
data = sample_data %>%
data.frame %>%
subset(., .$sample_type %in% c("BAL") & S.obs != 0)) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("bal_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_ns <- lmer(S.obs ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Nasal"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("ns_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = ),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = ),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = ),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_spt <- lmer(S.obs ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Sputum"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("spt_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tables4 <- cbind(sr_lmer_mock, sr_lmer_bal, sr_lmer_ns, sr_lmer_spt) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "Mock" = 3, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
tables4
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 40.7 ( 35.7, 45.7) | 0.000 | *** | 5.0 ( -7.3, 17.2) | 0.438 | 10.1 ( 7.4, 12.8) | 0.000 | *** | 15.8 (-4.9, 36.5) | 0.168 | ||
| lyPMA | -5.5 (-12.9, 1.9) | 0.160 | 1.8 (-11.3, 14.8) | 0.796 | -4.5 (-9.1, 0.1) | 0.068 | 36.8 (17.6, 56.0) | 0.001 | ** | |||
| Benzonase | -16.5 (-23.9, -9.1) | 0.000 | *** | 5.2 ( -7.3, 17.7) | 0.425 | -0.1 (-4.7, 4.5) | 0.969 | 65.8 (46.6, 85.0) | 0.000 | *** | ||
| Host zero | -15.7 (-23.1, -8.3) | 0.000 | *** | 8.4 ( -4.1, 20.9) | 0.204 | 10.3 ( 5.7, 14.9) | 0.000 | *** | 101.8 (82.6, 121.0) | 0.000 | *** | |
| Molysis | -12.5 (-19.9, -5.1) | 0.003 | ** | 18.4 ( 5.9, 30.9) | 0.010 | ** | 6.3 ( 1.7, 10.9) | 0.013 |
|
111.4 (92.2, 130.6) | 0.000 | *** |
| QIAamp | -15.5 (-22.9, -8.1) | 0.000 | *** | 9.0 ( -3.5, 21.5) | 0.175 | 7.5 ( 2.9, 12.1) | 0.004 | ** | 84.2 (65.0, 103.4) | 0.000 | *** |
save_kable(tables4, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS3.html", self_contained = T)
Species richness - stratified - with depth
This table is too redundant. Removed from the manuscript..
Sequencing depth adjusted effect size, standard error (SE) and p-value of a statistical test for species richness using linear mixed effect model stratified by sample type (Species richness ~ treatment + log10 (Final_reads) + (1|subject_id) ).
sr_lmer_bal <- lmer(S.obs ~ treatment + bal_log_centered_final_reads + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("BAL") & S.obs != 0)) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("bal_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_ns <- lmer(S.obs ~ treatment + ns_log_centered_final_reads + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Nasal"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("ns_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = ),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = ),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = ),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_spt <- lmer(S.obs ~ treatment + spt_log_centered_final_reads + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Sputum"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("spt_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
cbind(sr_lmer_bal, sr_lmer_ns, sr_lmer_spt) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | ||||
|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 6.2 ( -3.0, 15.4) | 0.212 | 10.7 ( 8.8, 12.7) | 0.000 | *** | 14.2 ( -5.2, 33.5) | 0.185 | ||
| lyPMA | -3.8 (-13.9, 6.3) | 0.473 | -0.9 (-4.2, 2.4) | 0.595 | 18.8 ( -3.9, 41.6) | 0.121 | |||
| Benzonase | -6.8 (-17.7, 4.1) | 0.239 | -1.0 (-4.1, 2.0) | 0.521 | 37.7 ( 8.9, 66.5) | 0.019 |
|
||
| Host zero | -5.5 (-16.9, 5.9) | 0.356 | 4.6 ( 1.0, 8.2) | 0.018 |
|
46.2 ( -2.5, 94.8) | 0.077 | ||
| Molysis | 3.3 ( -8.4, 15.0) | 0.585 | 4.9 ( 1.8, 8.0) | 0.005 | ** | 45.2 (-11.5, 101.9) | 0.134 | ||
| QIAamp | -6.2 (-18.0, 5.5) | 0.311 | 0.4 (-3.4, 4.3) | 0.831 | 37.1 ( -5.1, 79.3) | 0.100 | |||
| log10(Final reads) | 6.0 ( 3.2, 8.8) | 0.000 | *** | 2.9 ( 1.9, 3.9) | 0.000 | *** | 14.4 ( 2.7, 26.2) | 0.026 |
|
#save_kable(tableS7, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS7.html", self_contained = T)
Beta diversity distances - Morisita Horn index
Table S5. PERMANOVA - all samples
Table S5. Degree of freedom, effect size (residual,
R2) and p-value of permutational ANOVA for Morisita-Horn index with an
interaction term and strata term (MH-index of species composition ~
sample type * treatment + subject + log10(final reads),
strata = subject id). Statistical significances were noted with
***: p-value < 0.001.
horn_perm_all <- vegan::adonis2(distance(phyloseq_rel_nz, method="horn") ~ sample_type * treatment + subject_id,
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
permutations = 10000)
horn_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"), method="horn") ~ lypma + benzonase + host_zero + molysis + qiaamp,
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
horn_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="horn") ~ lypma + benzonase + host_zero + molysis + qiaamp,
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
horn_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="horn") ~ lypma + benzonase + host_zero + molysis + qiaamp,
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Sputum")
%>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
tableS5 <- horn_perm_all %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type * Treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
mutate(`<i>p</i>-value` = format(`<i>p</i>-value`, nsmall = 3)) %>%
dplyr::select(c("Degree of freedom", "R<sup>2</sup>", "<i>p</i>-value", " ")) %>%
kbl(format = "html", escape = 0) %>%
kable_styling(full_width = 0, html_font = "sans")
tableS5
| Degree of freedom | R2 | p-value | ||
|---|---|---|---|---|
| Sample type | 3 | 0.528 | 0.000 | *** |
| Treatment | 5 | 0.031 | 0.000 | *** |
| Subject | 17 | 0.347 | 0.000 | *** |
| Sample type * Treatment | 15 | 0.052 | 0.000 | *** |
| Residual | 83 | 0.042 | NA | |
| Total | 123 | 1.000 | NA |
save_kable(tableS5, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS5.html", self_contained = T)
PERMANOVA - Stratified
This table is too redundant. Removed from the manuscript..
Degree of freedom, effect size (residual, R^2) and p-value of permutational ANOVA for Morisita-Horn distiances for species richness stratified by sample type (MH-distance ~ lyPMA + Benzoase + Host zero + Molysis + QIAamp, strata = subject_id).
a <- horn_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
b <- horn_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
c <- horn_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
tableS7 <- cbind(a, b, c) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
tableS7
| R2 | p-value | R2 | p-value | R2 | p-value | ||||
|---|---|---|---|---|---|---|---|---|---|
| lyPMA | 0.006 | 0.295 | 0.058 | 0.004 | ** | 0.011 | 0.349 | ||
| Benzonase | 0.001 | 0.940 | 0.031 | 0.261 | 0.003 | 0.674 | |||
| Host zero | 0.006 | 0.434 | 0.029 | 0.474 | 0.025 | 0.120 | |||
| Molysis | 0.007 | 0.407 | -0.005 | 0.977 | 0.055 | 0.024 |
|
||
| QIAamp | 0.014 | 0.040 |
|
0.053 | 0.032 |
|
0.144 | 0.000 | *** |
| Residual | 0.966 | NA | 0.835 | NA | 0.762 | NA | |||
| Total | 1.000 | NA | 1.000 | NA | 1.000 | NA |
Table S6. LM on M-H distance
Table S6. Effect size, standard error (SE) and
p-value of a statistical test for Morisita-Horn index from untreated to
each treated within subject, stratified by sample type. Stratified
analyses were conducted for each sample type as an interaction term of
sample type and treatment was significant (p-value < 0.001) using a
model, ANOVA(species richness ~ sample type + treatment + sample type *
treatment). The tested was conducted with a linear model LM(distance
within subject by treatment ~ treatment). The baseline of sample type is
untreated (0 distance, untreated from untreated), and statistical
significances were noted with
*: p-value < 0.05, **: p-value < 0.01 and ***:p-value
< 0.001.
path_horn_dist_long_within_sampleid_from_control$treatment <- factor(path_horn_dist_long_within_sampleid_from_control$treatment,
levels = c("Untreated",
"lypma",
"benzonase",
"host_zero",
"molysis",
"qiaamp"))
path_horn_dist_long_within_sampleid_from_control$sample_type <- factor(path_horn_dist_long_within_sampleid_from_control$sample_type,
levels = c("BAL",
"Nasal",
"Sputum"))
Justifying stratified analysis - ANOVA(LM(dist ~ sample type * treatment))
lm(dist ~ treatment * sample_type, data = path_horn_dist_long_within_sampleid_from_control) %>% anova()
Stratified analysis
a <- lm(dist ~ treatment, data = path_horn_dist_long_within_sampleid_from_control %>% subset(., .$sample_type == "BAL")) %>% summary %>% .$ coefficients %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = gsub("host_zero", "Host zero", row.names)) %>%
mutate(row.names = gsub("benzonase", "Benzonase", row.names)) %>%
mutate(row.names = gsub("lypma", "lyPMA", row.names)) %>%
mutate(row.names = gsub("molysis", "Molysis", row.names)) %>%
mutate(row.names = gsub("qiaamp", "QIAamp", row.names)) %>%
mutate(across(is.numeric, round, digits=3)) %>%
mutate(`row.names` = gsub("treatment|sample_type", "", `row.names`)) %>%
column_to_rownames("row.names") %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 2) %>% format(nsmall = 2),
" (",
round(Estimate - 1.96 * SE, 2) %>% format(nsmall = 2),
", ",
round(Estimate + 1.96 * SE, 2) %>% format(nsmall = 2),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
b <- lm(dist ~ treatment, data = path_horn_dist_long_within_sampleid_from_control %>% subset(., .$sample_type == "Nasal")) %>% summary %>% .$ coefficients %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = gsub("host_zero", "Host zero", row.names)) %>%
mutate(row.names = gsub("benzonase", "Benzonase", row.names)) %>%
mutate(row.names = gsub("lypma", "lyPMA", row.names)) %>%
mutate(row.names = gsub("molysis", "Molysis", row.names)) %>%
mutate(row.names = gsub("qiaamp", "QIAamp", row.names)) %>%
mutate(across(is.numeric, round, digits=3)) %>%
mutate(`row.names` = gsub("treatment|sample_type", "", `row.names`)) %>%
column_to_rownames("row.names") %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 2) %>% format(nsmall = 2),
" (",
round(Estimate - 1.96 * SE, 2) %>% format(nsmall = 2),
", ",
round(Estimate + 1.96 * SE, 2) %>% format(nsmall = 2),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
c <- lm(dist ~ treatment, data = path_horn_dist_long_within_sampleid_from_control %>% subset(., .$sample_type == "Sputum")) %>% summary %>% .$ coefficients %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = gsub("host_zero", "Host zero", row.names)) %>%
mutate(row.names = gsub("benzonase", "Benzonase", row.names)) %>%
mutate(row.names = gsub("lypma", "lyPMA", row.names)) %>%
mutate(row.names = gsub("molysis", "Molysis", row.names)) %>%
mutate(row.names = gsub("qiaamp", "QIAamp", row.names)) %>%
mutate(across(is.numeric, round, digits=3)) %>%
mutate(`row.names` = gsub("treatment|sample_type", "", `row.names`)) %>%
column_to_rownames("row.names") %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 2) %>% format(nsmall = 2),
" (",
round(Estimate - 1.96 * SE, 2) %>% format(nsmall = 2),
", ",
round(Estimate + 1.96 * SE, 2) %>% format(nsmall = 2),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tableS6 <- cbind(a, b, c) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
save_kable(tableS6, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS6.html", self_contained = T)
tableS6
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | ||||
|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 0.00 (-0.23, 0.23) | 1.000 | 0.00 (-0.08, 0.08) | 1.000 | 0.00 (-0.27, 0.27) | 1.000 | |||
| lyPMA | 0.32 ( 0.00, 0.65) | 0.064 | 0.17 ( 0.04, 0.31) | 0.016 |
|
0.35 (-0.04, 0.74) | 0.093 | ||
| Benzonase | 0.11 (-0.21, 0.43) | 0.525 | 0.17 ( 0.03, 0.31) | 0.019 |
|
0.52 ( 0.13, 0.91) | 0.015 |
|
|
| Host zero | 0.27 (-0.05, 0.60) | 0.114 | 0.06 (-0.08, 0.19) | 0.396 | 0.61 ( 0.22, 1.00) | 0.005 | ** | ||
| Molysis | 0.20 (-0.12, 0.53) | 0.231 | 0.22 ( 0.09, 0.36) | 0.003 | ** | 0.60 ( 0.21, 0.99) | 0.006 | ** | |
| QIAamp | 0.25 (-0.07, 0.57) | 0.145 | 0.12 (-0.01, 0.26) | 0.081 | 0.62 ( 0.23, 1.00) | 0.005 | ** |
Fig. S6 PCA figure
Fig. S6. Principal coordinate analysis plot based on Morisita-Horn index of taxonomic sequencing results stratified by sample type.
figS6 <- ordinate(subset_samples(phyloseq_rel_nz, sample_type != "Neg." & sample_type != "Mock"), method = "PCoA", distance = "horn") %>%
plot_ordination(phyloseq_rel_nz, ., col = "treatment") +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Mock theoretical", "Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment",
breaks = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_shape(name = "Sample type", labels = c("Mock theoretical", "Mock")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "sans") +
facet_wrap(~sample_type, scales = "free") +
theme(plot.tag = element_text(size = 15), legend.position = "top")# +
#stat_ellipse(type = "norm") +
#stat_ellipse(type = "t")
figS6
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS6.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 90, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
figS6
dev.off()
## quartz_off_screen
## 2
iv. Mediation analysis
mediation analysis
Table 2. Mediation analysis (treatment-stratified)
outcome = S.obs exposure = treatment (stratified) mediator = Final_reads mediator-outcome confounders = sample_type exposure-mediator confounders = NA outcome model = Mixed effects linear regression mediator model = Mixed effects linear regression
Mediation analysis was conducted stratified treatment.
Table 2. Mediation analysis results in estimated effect sizes and p-values of indirect effect, direct effect, and proportion of mediation. The analysis employed treatment as exposure, log10(final reads) as mediator, species richness as outcome, and sample type as mediator-exposure confounder. Analysis was stratified by each treatment and treated as binary variables.
## lypma
# only mediator-outcome confounders
detach_package <- function(pkg, character.only = FALSE)
{
if(!character.only)
{
pkg <- deparse(substitute(pkg))
}
search_item <- paste("package", pkg, sep = ":")
while(search_item %in% search())
{
detach(search_item, unload = TRUE, character.only = TRUE)
}
}
detach_package(lmerTest)
## all treatment groups
sample_data_respiratory <- subset(phyloseq_rel_nz %>% sample_data %>% data.frame, sample_data$sample_type == "Sputum" | sample_data$sample_type == "BAL" | sample_data$sample_type == "Nasal")
med.fit <- lmer(log10(Final_reads) ~ lypma + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$lypma == 1))
out.fit <- lmer(S.obs ~ lypma * log10(Final_reads) + sample_type + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$lypma == 1))
med.out <- mediate(model.m = med.fit,
model.y = out.fit,
treat = "lypma",
mediator = "log10(Final_reads)",
sims = 1000)
out.sum <- summary(med.out)
final.out.lypma <- data.frame(`Indirect est.`=out.sum$d.avg,
`Indirect p-value`=out.sum$d.avg.p,
`Direct est.`=out.sum$z.avg,
`Direct p-value`=out.sum$z.avg.p,
`Total est.`=out.sum$tau.coef,
`Total p-value`=out.sum$tau.p,
`Proportion mediation est.`=out.sum$n.avg,
`Proportion mediation p-value`=out.sum$n.avg.p,
check.names = F) %>%
mutate_all(~round(., 3))
## benzonase
med.fit <- lmer(log10(Final_reads) ~ benzonase + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$benzonase == 1))
out.fit <- lmer(S.obs ~ benzonase * log10(Final_reads) + sample_type + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$benzonase == 1))
med.out <- mediate(model.m = med.fit,
model.y = out.fit,
treat = "benzonase",
mediator = "log10(Final_reads)",
sims = 1000)
out.sum <- summary(med.out)
final.out.benzonase <- data.frame(`Indirect est.`=out.sum$d.avg,
`Indirect p-value`=out.sum$d.avg.p,
`Direct est.`=out.sum$z.avg,
`Direct p-value`=out.sum$z.avg.p,
`Total est.`=out.sum$tau.coef,
`Total p-value`=out.sum$tau.p,
`Proportion mediation est.`=out.sum$n.avg,
`Proportion mediation p-value`=out.sum$n.avg.p,
check.names = F) %>%
mutate_all(~round(., 3))
## host zero
med.fit <- lmer(log10(Final_reads) ~ host_zero + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$host_zero == 1))
out.fit <- lmer(S.obs ~ host_zero * log10(Final_reads) + sample_type + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$host_zero == 1))
med.out <- mediate(model.m = med.fit,
model.y = out.fit,
treat = "host_zero",
mediator = "log10(Final_reads)",
sims = 1000)
out.sum <- summary(med.out)
final.out.host_zero <- data.frame(`Indirect est.`=out.sum$d.avg,
`Indirect p-value`=out.sum$d.avg.p,
`Direct est.`=out.sum$z.avg,
`Direct p-value`=out.sum$z.avg.p,
`Total est.`=out.sum$tau.coef,
`Total p-value`=out.sum$tau.p,
`Proportion mediation est.`=out.sum$n.avg,
`Proportion mediation p-value`=out.sum$n.avg.p,
check.names = F) %>%
mutate_all(~round(., 3))
## molysis
med.fit <- lmer(log10(Final_reads) ~ molysis + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$molysis == 1))
out.fit <- lmer(S.obs ~ molysis * log10(Final_reads) + sample_type + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$molysis == 1))
med.out <- mediate(model.m = med.fit,
model.y = out.fit,
treat = "molysis",
mediator = "log10(Final_reads)",
sims = 1000)
out.sum <- summary(med.out)
final.out.molysis <- data.frame(`Indirect est.`=out.sum$d.avg,
`Indirect p-value`=out.sum$d.avg.p,
`Direct est.`=out.sum$z.avg,
`Direct p-value`=out.sum$z.avg.p,
`Total est.`=out.sum$tau.coef,
`Total p-value`=out.sum$tau.p,
`Proportion mediation est.`=out.sum$n.avg,
`Proportion mediation p-value`=out.sum$n.avg.p,
check.names = F) %>%
mutate_all(~round(., 3))
## qiaamp
med.fit <- lmer(log10(Final_reads) ~ qiaamp + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$qiaamp == 1))
out.fit <- lmer(S.obs ~ qiaamp * log10(Final_reads) + sample_type + (1|subject_id),
data = subset(sample_data_respiratory, sample_data_respiratory$control == 1 | sample_data_respiratory$qiaamp == 1))
med.out <- mediate(model.m = med.fit,
model.y = out.fit,
treat = "qiaamp",
mediator = "log10(Final_reads)",
sims = 1000)
out.sum <- summary(med.out)
final.out.qiaamp <- data.frame(`Indirect est.`=out.sum$d.avg,
`Indirect p-value`=out.sum$d.avg.p,
`Direct est.`=out.sum$z.avg,
`Direct p-value`=out.sum$z.avg.p,
`Total est.`=out.sum$tau.coef,
`Total p-value`=out.sum$tau.p,
`Proportion mediation est.`=out.sum$n.avg,
`Proportion mediation p-value`=out.sum$n.avg.p,
check.names = F) %>%
mutate_all(~round(., 3))
table2 <- rbind(final.out.lypma,
final.out.benzonase,
final.out.host_zero,
final.out.molysis,
final.out.qiaamp) %>%
mutate(Treatment = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"), .before = "Indirect est.") %>%
kbl(format = "html") %>%
kable_styling(full_width = 0, html_font = "sans")
save_kable(table2, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/table2.html", self_contained = T)
table2
| Treatment | Indirect est. | Indirect p-value | Direct est. | Direct p-value | Total est. | Total p-value | Proportion mediation est. | Proportion mediation p-value |
|---|---|---|---|---|---|---|---|---|
| lyPMA | -2.090 | 0.630 | 11.472 | 0.060 | 9.383 | 0.220 | -0.054 | 0.846 |
| Benzonase | 9.362 | 0.026 | 11.554 | 0.198 | 20.916 | 0.018 | 0.428 | 0.040 |
| Host zero | 24.563 | 0.004 | 11.271 | 0.366 | 35.834 | 0.000 | 0.674 | 0.004 |
| Molysis | 21.320 | 0.002 | 18.391 | 0.056 | 39.710 | 0.002 | 0.518 | 0.000 |
| QIAamp | 20.438 | 0.044 | 11.184 | 0.356 | 31.622 | 0.000 | 0.651 | 0.044 |
x. How were changes of individual taxa?
x. How were changes of individual taxa? Differential abundance analysis
1. Volcano plot with Mock, BAL, Nasal and Sputum (Figure S5)
a. Maaslin (feature ~ lyPMA + Benzonase + Host zero + Molysis + QIAamp, RE = subject_id)
i. make sure that you are doing this analysis accounting for the compositional nature of this data
b. Or Maaslin ( feature ~ sample type + treatment + sample type * treatment, RE = subject_id)
2. Balloon plots BAL, Nasal and Sputum. (Figure 4)
a. Add q-val, mean relative abundance, gram strain, phylogenetic information
3. List of differentiallly abundant taxa by treatment method (Table S7)
a. Subset of low q-val (<0.1) & high fold-change (|est.| > 1)
DA taxa
Factors affecting DA taxa (main text)
filt_maaslin_all <- read.csv("data/filt_maaslin_all.csv")
filt_maaslin_interaction <- read.csv("data/filt_maaslin_interaction.csv")
filt_fit_data_bal <- read.csv("data/filt_fit_data_bal.csv")
filt_fit_data_spt <- read.csv("data/filt_fit_data_spt.csv")
filt_fit_data_ns <- read.csv("data/filt_fit_data_ns.csv")
filt_fit_data_pos <- read.csv("data/filt_fit_data_pos.csv")
cat("Factors affecting DA taxa (q<0.1)")
## Factors affecting DA taxa (q<0.1)
filt_maaslin_all %>% subset(., .$qval < 0.1 ) %>% subset(., abs(.$coef) > 1) %>% .$metadata %>% table
## .
## benzonase host_zero lypma molysis qiaamp sample_type
## 4 6 3 15 4 131
cat("Number of positive reads")
## Number of positive reads
filt_maaslin_all %>% subset(., .$qval < 0.1 ) %>% subset(., .$coef > 0) %>% .$feature %>% unique
## [1] "Listeria_floridensis"
## [2] "Listeria_monocytogenes"
## [3] "Cryptococcus_neoformans"
## [4] "Saccharomyces_cerevisiae"
## [5] "Bacillus_intestinalis"
## [6] "Saccharomyces_kudriavzevii"
## [7] "Cryptococcus_gattii_VGIII"
## [8] "Cryptococcus_gattii_VGII"
## [9] "Brochothrix_campestris"
## [10] "Cryptococcus_gattii_VGI"
## [11] "Bacillus_ginsengihumi"
## [12] "S._cerevisiae.x.S._kudriavzevii"
## [13] "Listeria_innocua"
## [14] "Pseudomonas_aeruginosa"
## [15] "Acholeplasma_oculi"
## [16] "Salmonella_enterica"
## [17] "Cutibacterium_acnes"
## [18] "Corynebacterium_accolens"
## [19] "Actinomyces_oris"
## [20] "Granulicatella_elegans"
## [21] "Listeria_marthii"
## [22] "Rothia_dentocariosa"
## [23] "Streptococcus_infantis"
## [24] "Streptococcus_australis"
## [25] "Actinomyces_sp_ICM47"
## [26] "Cupriavidus_sp"
## [27] "Gemella_sanguinis"
## [28] "Solobacterium_moorei"
## [29] "Gemella_haemolysans"
## [30] "Streptococcus_parasanguinis"
## [31] "Anaerobiospirillum_thomasii"
## [32] "Streptococcus_sp_F0442"
## [33] "Enorma_massiliensis"
## [34] "Streptococcus_salivarius"
## [35] "Scardovia_wiggsiae"
## [36] "Staphylococcus_argenteus"
## [37] "Collinsella_aerofaciens"
## [38] "Olsenella_scatoligenes"
## [39] "Actinomyces_sp_oral_taxon_180"
## [40] "Actinomyces_sp_oral_taxon_181"
## [41] "Gemella_morbillorum"
## [42] "Limnochorda_pilosa"
## [43] "Collinsella_stercoris"
## [44] "Aeriscardovia_aeriphila"
## [45] "Streptococcus_oralis"
## [46] "Actinomyces_naeslundii"
## [47] "Staphylococcus_epidermidis"
## [48] "Streptococcus_sanguinis"
## [49] "Streptococcus_gordonii"
## [50] "Granulicatella_adiacens"
## [51] "Staphylococcus_schweitzeri"
## [52] "Mogibacterium_diversum"
## [53] "Actinomyces_graevenitzii"
## [54] "Collinsella_intestinalis"
## [55] "Slackia_isoflavoniconvertens"
## [56] "Staphylococcus_aureus"
## [57] "Gemella_bergeri"
## [58] "Pseudopropionibacterium_propionicum"
## [59] "X.Collinsella._massiliensis"
## [60] "Streptococcus_sp_HPH0090"
## [61] "Atopobium_rimae"
## [62] "Actinomyces_sp_oral_taxon_170"
## [63] "Mogibacterium_pumilum"
## [64] "Parvimonas_micra"
## [65] "Sutterella_parvirubra"
## [66] "Actinomyces_sp_HMSC035G02"
## [67] "Actinomyces_sp_S6_Spd3"
## [68] "Abiotrophia_defectiva"
## [69] "Streptococcus_peroris"
## [70] "Corynebacterium_atypicum"
## [71] "Streptococcus_vestibularis"
## [72] "Corynebacterium_durum"
## [73] "Lactobacillus_fermentum"
## [74] "Veillonella_dispar"
## [75] "Rothia_mucilaginosa"
## [76] "Bulleidia_extructa"
## [77] "Actinomyces_johnsonii"
## [78] "Streptococcus_anginosus"
## [79] "Paludisphaera_borealis"
## [80] "Streptococcus_pseudopneumoniae"
## [81] "Streptococcus_sp_A12"
## [82] "Actinomyces_sp_HPA0247"
## [83] "Olsenella_profusa"
## [84] "Eubacterium_infirmum"
## [85] "Fusobacterium_nucleatum"
## [86] "Slackia_exigua"
## [87] "Porphyromonas_somerae"
## [88] "Actinomyces_odontolyticus"
## [89] "Lachnoanaerobaculum_saburreum"
## [90] "Parvimonas_sp_oral_taxon_393"
## [91] "Abiotrophia_sp_HMSC24B09"
## [92] "Rothia_aeria"
## [93] "Actinomyces_meyeri"
## [94] "Actinomyces_viscosus"
## [95] "Kouleothrix_aurantiaca"
## [96] "Denitrobacterium_detoxificans"
## [97] "Alloscardovia_omnicolens"
## [98] "Streptococcus_sp_HMSC034E03"
## [99] "Hydrogenibacillus_schlegelii"
## [100] "Thermoleophilum_album"
## [101] "Oribacterium_sinus"
## [102] "Enterococcus_faecalis"
## [103] "Propionibacterium_humerusii"
## [104] "Olsenella_uli"
## [105] "Streptococcus_mutans"
## [106] "Eubacterium_brachy"
## [107] "Neisseria_flavescens"
## [108] "Atopobium_parvulum"
## [109] "Stenotrophomonas_maltophilia"
## [110] "Actinomyces_massiliensis"
## [111] "Streptococcus_sp_M334"
## [112] "Parvimonas_sp_oral_taxon_110"
## [113] "Cutibacterium_granulosum"
## [114] "Streptococcus_pneumoniae"
## [115] "Streptococcus_mitis"
## [116] "Veillonella_atypica"
## [117] "Veillonella_parvula"
## [118] "Streptococcus_cristatus"
## [119] "Thiohalorhabdus_denitrificans"
## [120] "Streptococcus_sp_HMSC067H01"
## [121] "Neisseria_subflava"
## [122] "Eubacterium_sulci"
## [123] "Streptococcus_sp_HMSC071D03"
## [124] "Gemmata_obscuriglobus"
## [125] "Veillonella_infantium"
## [126] "Dolosigranulum_pigrum"
## [127] "Propionibacterium_namnetense"
## [128] "Prevotella_histicola"
## [129] "Escherichia_coli"
## [130] "Candida_parapsilosis"
## [131] "Alkalilimnicola_ehrlichii"
## [132] "Peptostreptococcus_stomatis"
## [133] "Veillonella_sp_T11011_6"
## [134] "Malassezia_restricta"
## [135] "Stomatobaculum_longum"
## [136] "Corynebacterium_pseudogenitalium"
cat("Number of negative reads")
## Number of negative reads
filt_maaslin_all %>% subset(., .$qval < 0.1 ) %>% subset(., .$coef < 0) %>% .$feature %>% unique
## [1] "Pseudomonas_aeruginosa"
## [2] "Bacillus_intestinalis"
## [3] "Escherichia_coli"
## [4] "Salmonella_enterica"
## [5] "Cupriavidus_sp"
## [6] "Alkalilimnicola_ehrlichii"
## [7] "Thiohalorhabdus_denitrificans"
## [8] "Corynebacterium_accolens"
## [9] "Enterococcus_faecalis"
## [10] "Gemella_asaccharolytica"
## [11] "Hydrogenibacillus_schlegelii"
## [12] "Veillonella_atypica"
## [13] "Corynebacterium_pseudodiphtheriticum"
## [14] "Staphylococcus_epidermidis"
## [15] "Malassezia_restricta"
## [16] "Paludisphaera_borealis"
## [17] "Veillonella_dispar"
## [18] "Prevotella_melaninogenica"
cat("BAL highly changed taxa (q<0.1 & abs(.$coef) >1)")
## BAL highly changed taxa (q<0.1 & abs(.$coef) >1)
filt_fit_data_bal %>% subset(., .$qval < 0.1 ) %>% subset(., abs(.$coef) >1 ) %>% .$metadata %>% table
## < table of extent 0 >
cat("Sputum highly changed taxa (q<0.1 & abs(.$coef) >1)")
## Sputum highly changed taxa (q<0.1 & abs(.$coef) >1)
filt_fit_data_spt %>% subset(., .$qval < 0.1 ) %>% subset(., abs(.$coef) >1 ) %>% .$metadata %>% table
## .
## benzonase host_zero lypma molysis qiaamp
## 61 85 37 98 75
cat("NS highly changed taxa (q<0.1 & abs(.$coef) >1)")
## NS highly changed taxa (q<0.1 & abs(.$coef) >1)
filt_fit_data_ns %>% subset(., .$qval < 0.1 ) %>% subset(., abs(.$coef) >1 ) %>% .$metadata %>% table
## .
## host_zero lypma molysis qiaamp
## 1 5 1 2
Fig. S7. Volcano plot
Fig. S7. Volcano plot of differential abundance of microbes by each treatment with a model MaAsLin (relative abundance of each taxa ~ sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp, random effect = subject id).
#Making significance table for figure
# Define a function to make species names italicized
# Make a significance table for each figure (top 20 taxa)
species_italic <- function(data) {
names <- gsub("_", " ", rownames(data))
names <- gsub("[]]|[[]", "", names)
names <- gsub(" sp", " sp.", names)
names <- gsub(" sp.", "* sp.", names)
names <- gsub(" group", "", names)
names <- ifelse(grepl("[*]", names), paste("*", names, sep = ""), paste("*", names, "*", sep = ""))
rownames(data) <- names
data
}
species_revise <- function(data) {
data$feature <- gsub("Saccharomyces_cerevisiae_x_Saccharomyces_kudriavzevii", "S._cerevisiae x S._kudriavzevii", data$feature)
data$feature <- gsub("Pseudomonas_aeruginosa_group", "Pseudomonas_aeruginosa", data$feature)
data$feature <- gsub("Pseudomonas_fluorescens_group", "Pseudomonas_fluorescens", data$feature)
data
}
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data <- species_revise(sig_data)
sig_data$min <- apply(sig_data %>% dplyr::select(c("lypma", "benzonase", "molysis", "host_zero", "qiaamp")), 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% dplyr::select("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp") %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>%
column_to_rownames(var = "feature") %>% species_italic %>% dplyr::select(-c("-")) %>%
rename(lyPMA = lypma, Benzonase = benzonase, `Host zero` = host_zero, Molysis = molysis, QIAamp = qiaamp)
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA) %>% data.frame(check.names = F)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
filt_fit_data_pos <- make_sig_table(filt_fit_data_pos)
filt_fit_data_bal <- make_sig_table(filt_fit_data_bal)
filt_fit_data_ns <- make_sig_table(filt_fit_data_ns)
filt_fit_data_spt <- make_sig_table(filt_fit_data_spt)
filt_pos_sig <- subset_taxa(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "Mock"),
taxa_names(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "Mock")) %in% filt_fit_data_pos$data$feature)
filt_fit_data_pos$rel <- cbind(filt_pos_sig %>% otu_table %>% t, filt_pos_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(filt_fit_data_pos$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
filt_spt_sig <- subset_taxa(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "Sputum"), taxa_names(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "Sputum")) %in% filt_fit_data_spt$data$feature)
filt_fit_data_spt$rel <- cbind(filt_spt_sig %>% otu_table %>% t, filt_spt_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(filt_fit_data_spt$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
filt_ns_sig <- subset_taxa(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "Nasal"),
taxa_names(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "Nasal")) %in% filt_fit_data_ns$data$feature)
filt_fit_data_ns$rel <- cbind(filt_ns_sig %>% otu_table %>% t, filt_ns_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(filt_fit_data_ns$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
filt_bal_sig <- subset_taxa(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_unfiltered$phyloseq_rel, sample_type == "BAL")) %in% filt_fit_data_bal$data$feature)
filt_fit_data_bal$rel <- cbind(filt_bal_sig %>% otu_table %>% t, filt_bal_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% species_italic() %>% data.frame(check.names = F) %>%
.[row.names(filt_fit_data_bal$data_italic),] %>%
mutate_all(~na_if(., 0)) %>% rownames_to_column("feature") %>% subset(., !grepl("NA", .$feature))
#Volcano plot
figS7 <- ggplot(filt_maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "sans") +
#labs(tag = "A") +
geom_point(size = 2, alpha = 0.5) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
ylim(c(-1, 35)) +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
annotate(family = "sans",
geom='richtext',
x=0, y=80,
label = "<i>q</i>-value = 0.1, fold-change = 0") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown(), legend.text = element_markdown()) +
scale_color_manual(values = c("#4daf4a", "grey", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628"),
breaks = c("log10.Final_reads", "sample_type", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("log<sub>10</sub>(Final reads)", "Sample type", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
guides(col = guide_legend(title = "Factors", title.position = "top", nrow = 1))
figS7
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS7.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 90, # The height of the plot in inches/
units = "mm",
res = 600
) #fixing multiple page issue
figS7
dev.off()
## quartz_off_screen
## 2
Fig. S8. Balloon plot
Fig. S8. Mean relative abundance of top 20 significant taxa by q-value identified by differential abundance analysis using MaAsLin. Analyses were stratified by sample type. (A) bronchoalveolor lavage, (B) nasal swabs, and (C) sputum. Statistical significances were noted at the level of q-value < 0.1
f5a <- merge(filt_fit_data_pos$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
filt_fit_data_pos$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(filt_fit_data_pos$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(sig < 0.1 ~ "< 0.1",
.default = "> 0.1"),
feature = case_when(feature =="*Saccharomyces cerevisiae x Saccharomyces kudriavzevii*" ~ "*Saccharomyces cerevisiae*",
.default = feature)) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "A") +
ggtitle("Mock community") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 8),
plot.margin = unit(c(0,0.2,0,1), 'lines')) +
#Adding significance asterisks
scale_fill_manual(values = c("red", "grey"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top"),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 1),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
#ffff33 qia
f5b <- merge(filt_fit_data_bal$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
filt_fit_data_bal$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(filt_fit_data_bal$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(is.na(qval) ~ "> 0.1",
qval < 0.1 ~ "< 0.1",
.default = "> 0.1")) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
#xlab("Experimental group") +
#ylab("Species") +
labs(tag = "A") +
ggtitle("BAL") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
#axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 8),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
plot.margin = unit(c(0,0.2,0,1), 'lines')) +
scale_fill_manual(values = c("grey", "red"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top"),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 1)
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
f5c <- merge(filt_fit_data_ns$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
filt_fit_data_ns$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(filt_fit_data_ns$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(sig < 0.1 ~ "< 0.1",
.default = "> 0.1")) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Species") +
labs(tag = "B") +
ggtitle("Nasal swab") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
#axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 8),
axis.title.x = element_blank(),
plot.margin = unit(c(0,0.2,0,1), 'lines')) +
#Adding significance asterisks
scale_fill_manual(values = c("red", "grey"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
override.aes = list(size=5)),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 1),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
f5d <- merge(filt_fit_data_spt$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
filt_fit_data_spt$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(filt_fit_data_spt$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(sig < 0.1 ~ "< 0.1",
.default = "> 0.1")) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
#ylab("Species") +
labs(tag = "C") +
ggtitle("Sputum") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
#axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 8),
#axis.title.x = element_blank(),
axis.title.x = element_text(margin = margin(t = 0)),
axis.title.y = element_blank(),
plot.margin = unit(c(0,0.2,0,1), 'lines')) +
#Adding significance asterisks
scale_fill_manual(values = c("red", "grey"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top"),
size = guide_legend(title = "Relative abundance",
title.position = "top",
order = 1,
nrow = 1),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
fig4 <- ggarrange(f5c %>% lemon::g_legend() %>% as_ggplot,
f5b,
f5c,
f5d,
#b, b %>% lemon::g_legend() %>% as_ggplot,
ncol=1, heights = c(1.5, 4, 4, 4),
legend = "none",
align = "hv")
fig4
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS8.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 220, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
fig4
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
Table S7. List of DA taxa
Table S7. List of differentially abundant taxa identified by MaAsLin (Beghini et al., 2021). Significant associations (q-value < 0.1) that is showing high change ( | log2(fold-change) | > 0.5) were listed.
tableS7 <- filt_maaslin_all %>% subset(., .$qval < 0.1 & abs(.$coef) > 1) %>% subset(., .$metadata != "sample_type" & .$metadata != "log10.Final_reads") %>% dplyr::select(c("feature", "metadata", "coef", "qval")) %>%
mutate(feature = paste("<i>", gsub("_", " ", feature), sep = "") %>%
gsub(" sp", "</i> sp.", .) %>% gsub(" group", "", .)) %>%
mutate(feature = case_when(!grepl("</i>", feature) ~ paste(feature, "</i>", sep = ""),
.default = feature))%>%
rename(Taxa = "feature",
Treatment = "metadata",
"log<sub>2</sub>(fold-change)" = "coef",
`<i>q<i/>-value` = "qval") %>%
mutate(Treatment = case_when(Treatment == "lypma" ~ "lyPMA",
Treatment == "host_zero" ~ "Host zero",
Treatment == "benzonase" ~ "Benzonase",
Treatment == "molysis" ~ "Molysis",
Treatment == "qiaamp" ~ "QIAamp",
.default = Treatment),
"log<sub>2</sub>(fold-change)" = round(`log<sub>2</sub>(fold-change)`, 3),
`<i>q<i/>-value` = round(`<i>q<i/>-value`, 3)) %>%
remove_rownames() %>%
kbl(format = "html", escape = FALSE) %>%
kable_styling(full_width = 0, html_font = "sans")
tableS7
| Taxa | Treatment | log2(fold-change) | q-value |
|---|---|---|---|
| Cupriavidus sp. | lyPMA | 1.610 | 0.000 |
| Gemella haemolysans | Molysis | 1.300 | 0.000 |
| Pseudomonas aeruginosa | Host zero | -1.798 | 0.000 |
| Pseudomonas aeruginosa | Molysis | -1.803 | 0.000 |
| Gemella haemolysans | QIAamp | 1.185 | 0.000 |
| Escherichia coli | Host zero | -1.933 | 0.000 |
| Streptococcus oralis | Molysis | 1.618 | 0.000 |
| Olsenella scatoligenes | Molysis | 1.039 | 0.000 |
| Gemella haemolysans | Host zero | 1.056 | 0.000 |
| Escherichia coli | Benzonase | -1.719 | 0.000 |
| Salmonella enterica | Host zero | -1.001 | 0.000 |
| Actinomyces sp. oral taxon 181 | Molysis | 1.109 | 0.000 |
| X.Collinsella. massiliensis | QIAamp | 1.081 | 0.000 |
| Gemella haemolysans | Benzonase | 1.006 | 0.000 |
| Actinomyces oris | Molysis | 1.313 | 0.001 |
| Pseudomonas aeruginosa | QIAamp | -1.407 | 0.001 |
| Granulicatella elegans | Benzonase | 1.112 | 0.001 |
| Sutterella parvirubra | lyPMA | 1.622 | 0.001 |
| X.Collinsella. massiliensis | Molysis | 1.028 | 0.001 |
| Actinomyces naeslundii | Molysis | 1.115 | 0.001 |
| Gemella morbillorum | Molysis | 1.024 | 0.001 |
| Granulicatella elegans | Molysis | 1.059 | 0.001 |
| Rothia mucilaginosa | Molysis | 1.050 | 0.002 |
| Cupriavidus sp. | Molysis | 1.057 | 0.002 |
| Cutibacterium acnes | lyPMA | 1.260 | 0.002 |
| Cupriavidus sp. | Host zero | 1.035 | 0.002 |
| Pseudomonas aeruginosa | Benzonase | -1.247 | 0.003 |
| Alkalilimnicola ehrlichii | Molysis | -1.033 | 0.006 |
| Thiohalorhabdus denitrificans | Molysis | -1.094 | 0.006 |
| Escherichia coli | QIAamp | -1.062 | 0.020 |
| Escherichia coli | Molysis | -1.062 | 0.020 |
| Sutterella parvirubra | Host zero | 1.024 | 0.032 |
save_kable(tableS7, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS7.html", self_contained = T)
3.4. Effect of treatments on functional analysis results
xi. Did depletion methods change diversity, by sample type?
Function diveristy
Fig. S9. Figure of alpha and beta
Fig. S9. Alpha and beta diversity by sample type and treatment method of predicted functions. (A) Species richness with statistical test results (linear mixed effect model stratified by sample type), (B) Morisita-Horn index within subject between treatment, representing squares for median value and bars for 95% confidence intervals.
sample_data <- sample_data(phyloseq$phyloseq_path_rpk) %>% data.frame(check.names = F) %>% subset(., !is.nan(.$simpson))
phyloseq_rel_nz <- subset_samples(phyloseq$phyloseq_path_rpk, S.obs != 0 & sample_type %in% c("BAL", "Nasal", "Sputum", "Mock"))
sample_data(phyloseq_rel_nz)$log10.Final_reads <- log10(sample_data(phyloseq_rel_nz)$Final_reads)
sample_data(phyloseq_rel_nz)$sampletype_treatment <- paste(sample_data(phyloseq_rel_nz)$sample_type, sample_data(phyloseq_rel_nz)$treatment, sep = ":")
f4a <- ggplot(subset(sample_data(phyloseq$phyloseq_path_rpk) %>%
data.frame, sample_data(phyloseq$phyloseq_path_rpk)$sample_type %in% c("Sputum", "Nasal", "BAL", "Mock")), aes(x = treatment, y = S.obs)) +
geom_jitter(aes(color = treatment), position = position_jitter(0.2), size = 1.2) +
stat_summary(aes(color = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
ylab("Function richness") +
xlab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using
labs(tag = "A") +
theme(plot.tag = element_text(size = 15), axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
facet_wrap(~sample_type, nrow = 1) +
guides(col = guide_legend(nrow = 1))
dat_text <- data.frame(
label = c(
"**", "***", "***", "", "***", #label for Mock
"", "**", "***", "***", "**", #label for BAL
"", "", "***", "**", "***",
"**", "***", "***", "***", "***"),
sample_type = c(
"Mock", "Mock", "Mock", "Mock", "Mock",
"BAL", "BAL", "BAL", "BAL", "BAL",
"Nasal", "Nasal", "Nasal", "Nasal", "Nasal",
"Sputum", "Sputum", "Sputum", "Sputum", "Sputum"),
treatment = c(
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp",
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp",
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp",
"lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
S.obs = c(
420, 400, 330, 410, 400,
250, 230, 300, 320, 300,
210, 220, 230, 240, 250,
330, 350, 370, 350, 360)
)
dat_text$sample_type <- factor(dat_text$sample_type, levels = c("Mock", "BAL", "Nasal", "Sputum"))
dat_text$treatment <- factor(dat_text$treatment, levels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"))
f4a <- f4a + geom_text(
data = dat_text,
mapping = aes(x = treatment, y = S.obs, label = label)
)
#f3a <- f3a + geom_text(
# data = dat_text,
# mapping = aes(x = treatment, y = S.obs, label = label)
#)
#distances of betadiversity - boxplots
horn_dist_long <- distance(phyloseq_rel_nz, method="horn") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `horn_dist_long`
names <- data.frame(str_split_fixed(horn_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(horn_dist_long$iso2, "_", 3))
horn_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
horn_dist_long$method_1 <- ifelse(grepl("lyPMA", horn_dist_long$iso1),"lypma",
ifelse(grepl("ben", horn_dist_long$iso1),"benzonase",
ifelse(grepl("host", horn_dist_long$iso1),"host_zero",
ifelse(grepl("qia", horn_dist_long$iso1),"qiaamp",
ifelse(grepl("moly", horn_dist_long$iso1),"molysis",
"control")))))
#Adding data for iso 2 also should be done
horn_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
horn_dist_long$method_2 <-ifelse(grepl("lyPMA", horn_dist_long$iso2),"lypma",
ifelse(grepl("ben", horn_dist_long$iso2),"benzonase",
ifelse(grepl("host", horn_dist_long$iso2),"host_zero",
ifelse(grepl("qia", horn_dist_long$iso2),"qiaamp",
ifelse(grepl("moly", horn_dist_long$iso2),"molysis",
"control")))))
#subsetting distances of my interest
horn_dist_long$sample_id_1 <- ifelse(grepl("pos", horn_dist_long$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_1, ignore.case = T),"Neg.",
horn_dist_long$sample_id_1))
horn_dist_long$sample_id_2 <- ifelse(grepl("pos", horn_dist_long$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_2, ignore.case = T),"Neg.",
horn_dist_long$sample_id_2))
path_horn_dist_long_within_sampleid_from_control <- subset(horn_dist_long, horn_dist_long$sample_id_1 == horn_dist_long$sample_id_2) # data within samples
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control,
path_horn_dist_long_within_sampleid_from_control$method_1 != path_horn_dist_long_within_sampleid_from_control$method_2) # remove irrelevant association
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control, (path_horn_dist_long_within_sampleid_from_control$method_1 == "control") + (path_horn_dist_long_within_sampleid_from_control$method_2 == "control") != 0)
path_horn_dist_long_within_sampleid_from_control$treatment <- path_horn_dist_long_within_sampleid_from_control$method_1
path_horn_dist_long_within_sampleid_from_control$treatment <- ifelse(path_horn_dist_long_within_sampleid_from_control$treatment == "control", path_horn_dist_long_within_sampleid_from_control$method_2, path_horn_dist_long_within_sampleid_from_control$treatment)
#Setting key method
path_horn_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_horn_dist_long_within_sampleid_from_control$iso1), "Nasal",
ifelse(grepl("CFB", path_horn_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_horn_dist_long_within_sampleid_from_control$iso1), "BAL",
ifelse(grepl("pos|POS", path_horn_dist_long_within_sampleid_from_control$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", path_horn_dist_long_within_sampleid_from_control$iso1), "Neg.",NA)))))
#Making a column for baseline (controls, from where?)
path_horn_dist_long_within_sampleid_from_control <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(dist_from = case_when(method_1 == "control" ~ iso1,
method_2 == "control" ~ iso2))
dummy <- data.frame(iso1 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
iso2 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
dist = 0,
treatment = "Untreated",
method_1 = "control",
method_2 = "control"
)
names <- data.frame(str_split_fixed(dummy$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(dummy$iso2, "_", 3))
dummy$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
#Adding data for iso 2 also should be done
dummy$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
#subsetting distances of my interest
dummy$sample_id_1 <- ifelse(grepl("pos", dummy$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", dummy$sample_id_1, ignore.case = T),"Neg.",
dummy$sample_id_1))
dummy$sample_id_2 <- ifelse(grepl("pos", dummy$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", dummy$sample_id_2, ignore.case = T),"Neg.",
dummy$sample_id_2))
dummy$sample_type <- ifelse(grepl("NS", dummy$iso1), "Nasal",
ifelse(grepl("CFB", dummy$iso1), "Sputum",
ifelse(grepl("BAL", dummy$iso1), "BAL",
ifelse(grepl("pos|POS", dummy$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", dummy$iso1), "Neg.",NA)))))
dummy <- subset(dummy, !is.na(dummy$sample_type))
path_horn_dist_long_within_sampleid_from_control <- bind_rows(path_horn_dist_long_within_sampleid_from_control, dummy)
f4b2 <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(across(sample_type, factor, levels=c("Mock", "BAL", "Nasal","Sputum"))) %>%
subset(., .$sample_type != "Neg.") %>%
group_by(sample_type, treatment) %>%
summarise(mean = mean(dist, na.rm = TRUE),
sd = sd(dist, na.rm = TRUE),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
treatment = factor(treatment, levels = c("Untreated", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp"))) %>%#,
#text = paste(sprintf("%.2f", round(mean, digits = 2)), " [", sprintf("%.2f", round(lower.ci, digits = 2)), ", ", sprintf("%.2f", round(upper.ci, digits = 2)), "]", sep = "")) %>%
ggplot(aes(x = mean, y = treatment, col = treatment)) +
geom_point(aes(x=mean), shape=15, size=3) +
geom_linerange(aes(xmin=lower.ci, xmax=upper.ci)) +
facet_wrap(~sample_type, nrow = 4) +
scale_y_discrete(limits=rev) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
xlab("Distance from untreated") +
ylab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
theme(plot.tag = element_text(size = 15),
axis.text.y = element_blank(), axis.ticks.y = element_blank(), legend.position = "none") +
labs(tag = "B") +
geom_vline(xintercept = 0, col = "black", linetype="dotted") +
#coord_cartesian(xlim=c(-0.5, 1)) +
#geom_text(aes(x = 0, label = treatment), hjust = 0, nudge_x = -.55, size = 3, color = "black", family = "sans") +
#geom_text(aes(x = 0, label = text), hjust = 0, nudge_x = -0.4, size = 3, color = "black", family = "sans") +
scale_x_continuous(breaks = c(0, 1), labels = c("Lower bias", "Higher bias"))
figS8 <- ggarrange(f4a, f4b2, ncol = 1, common.legend = T, align = "hv") +
guides(fill = guide_legend(nrow = 1))
figS8
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS8.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 170, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
figS8
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
Function richness - LMER all samples
Effect of some treatment was neutralized by interaction term. Therefore, the association was sample_type specific.
Effect size, standard error (SE) and p-value of a statistical test for function richness with an interaction term using linear mixed effect model (Species richness ~ sample type * treatment + (1|subject_id) ).
library(lmerTest)
sample_data <- sample_data(phyloseq$phyloseq_path_rpk)
sample_data$log_centered_final_reads <- log(sample_data$Final_reads + 1) - median(log((subset(sample_data, sample_data$sample_type %in% c("BAL") & sample_data$treatment %in% c("Untreated")) %>% .$Final_reads) + 1))
lmer(S.obs ~ sample_type * treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("BAL", "Nasal", "Sputum"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " ")) %>%
kbl(format = "html", escape = 0) %>% kable_styling(full_width = 0, html_font = "sans")
| Effect size (95% CI) | p-value | ||
|---|---|---|---|
| (Intercept) | 21.6 ( -25.2, 68.4) | 0.370 | |
| Nasal | 115.5 ( 58.1, 172.9) | 0.000 | *** |
| Sputum | 148.2 ( 82.0, 214.4) | 0.000 | *** |
| lyPMA | 58.6 ( 7.3, 109.9) | 0.029 |
|
| Benzonase | 126.4 ( 75.1, 177.7) | 0.000 | *** |
| Host zero | 164.6 ( 113.3, 215.9) | 0.000 | *** |
| Molysis | 190.0 ( 138.7, 241.3) | 0.000 | *** |
| QIAamp | 130.2 ( 78.9, 181.5) | 0.000 | *** |
| Nasal * lyPMA | -41.6 (-110.9, 27.8) | 0.244 | |
| Sputum * lyPMA | 21.2 ( -51.4, 93.8) | 0.569 | |
| Nasal * Benzonase | -110.9 (-180.3, -41.4) | 0.003 | ** |
| Sputum * Benzonase | -40.6 (-113.2, 32.0) | 0.277 | |
| Nasal * Host zero | -105.3 (-174.7, -35.8) | 0.004 | ** |
| Sputum * Host zero | -27.6 (-100.2, 45.0) | 0.459 | |
| Nasal * Molysis | -144.4 (-213.8, -75.1) | 0.000 | *** |
| Sputum * Molysis | -48.2 (-120.8, 24.4) | 0.198 | |
| Nasal * QIAamp | -57.5 (-127.0, 11.9) | 0.109 | |
| Sputum * QIAamp | -14.6 ( -87.2, 58.0) | 0.695 |
Table S8. Function richness - all & stratified
Table S8. Effect size, standard error (SE) and p-value of a statistical test for function richness with an interaction term using linear mixed effect model (function richness ~ treatment + (1|subject_id)). Stratified analyses were conducted for each sample type as an interaction term of sample type and treatment was significant (p-value < 0.001) at ANOVA of LMER(function richness ~ sample type + treatment + sample type * treatment + (1|subject_id)). The baseline of categorical variables is untreated group. Statistical significances were noted with : p-value < 0.01 and *: p-value < 0.001.
Association was not adjusted with sequencing depth.
sr_lmer_mock <- lm(S.obs ~ treatment, data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Mock"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 0) %>% format(nsmall = 0),
" (",
round(Estimate - 1.96 * SE, 0) %>% format(nsmall = 0),
", ",
round(Estimate + 1.96 * SE, 0) %>% format(nsmall = 0),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_bal <- lmer(S.obs ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("BAL"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 0) %>% format(nsmall = 0),
" (",
round(Estimate - 1.96 * SE, 0) %>% format(nsmall = 0),
", ",
round(Estimate + 1.96 * SE, 0) %>% format(nsmall = 0),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_ns <- lmer(S.obs ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Nasal"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 0) %>% format(nsmall = 0),
" (",
round(Estimate - 1.96 * SE, 0) %>% format(nsmall = 0),
", ",
round(Estimate + 1.96 * SE, 0) %>% format(nsmall = 0),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_spt <- lmer(S.obs ~ treatment + (1|subject_id), data = sample_data %>% data.frame %>% subset(., .$sample_type %in% c("Sputum"))) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("sample_type|treatment", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 0) %>% format(nsmall = 0),
" (",
round(Estimate - 1.96 * SE, 0) %>% format(nsmall = 0),
", ",
round(Estimate + 1.96 * SE, 0) %>% format(nsmall = 0),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tables8 <- cbind(sr_lmer_mock, sr_lmer_bal, sr_lmer_ns, sr_lmer_spt) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "Mock" = 3, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
tables8
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | |||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 407 ( 390, 425) | 0.000 | *** | 22 (-48, 91) | 0.554 | 137 (119, 155) | 0.000 | *** | 170 (127, 212) | 0.000 | *** | |
| lyPMA | -41 ( -67, -15) | 0.005 | ** | 59 (-16, 133) | 0.137 | 17 ( -8, 43) | 0.187 | 80 ( 38, 122) | 0.001 | ** | ||
| Benzonase | -85 (-112, -59) | 0.000 | *** | 126 ( 52, 201) | 0.003 | ** | 15 (-10, 40) | 0.252 | 86 ( 44, 128) | 0.001 | *** | |
| Host zero | -99 (-125, -73) | 0.000 | *** | 165 ( 90, 239) | 0.000 | *** | 59 ( 34, 84) | 0.000 | *** | 137 ( 95, 179) | 0.000 | *** |
| Molysis | -14 ( -40, 13) | 0.317 | 190 (116, 264) | 0.000 | *** | 45 ( 20, 70) | 0.002 | ** | 142 (100, 184) | 0.000 | *** | |
| QIAamp | -39 ( -66, -13) | 0.007 | ** | 130 ( 56, 204) | 0.003 | ** | 73 ( 48, 98) | 0.000 | *** | 116 ( 74, 158) | 0.000 | *** |
save_kable(tables8, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS8.html", self_contained = T)
Function beta - all samples
phyloseq_rel_nz <- transform_sample_counts(phyloseq$phyloseq_path_rpk, function(x) {x/sum(x)}) %>%
subset_samples(S.obs != 0 & sample_type %in% c("Mock", "BAL", "Nasal", "Sputum"))
bray_perm_inter <- vegan::adonis2(distance(phyloseq_rel_nz, method="horn") ~ sample_type * treatment + subject_id + log10(Final_reads),
data = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F),
strata = phyloseq_rel_nz %>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_ns <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"), method="horn") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Nasal") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id, permutations = 10000)
bray_perm_bal <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "BAL"), method="horn") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "BAL") %>%
sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_spt <- vegan::adonis2(distance(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"), method="horn") ~ lypma + benzonase + host_zero + molysis + qiaamp + log10(Final_reads),
data = subset_samples(phyloseq_rel_nz, sample_type == "Sputum") %>% sample_data %>% data.frame(check.names = F),
strata = subset_samples(phyloseq_rel_nz, sample_type == "Sputum")
%>% sample_data %>% data.frame(check.names = F) %>% .$subject_id,
permutations = 10000)
bray_perm_inter %>% data.frame(check.names = F) %>% rownames_to_column("row.names") %>%
mutate(row.names = case_when(row.names == "sample_type" ~ 'Sample type',
row.names == "treatment" ~ 'Treatment',
row.names == "subject_id" ~ 'Subject',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "sample_type:treatment" ~ 'Sample type * Treatment',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3),
`Pr(>F)` = format(`Pr(>F)`, nsmall = 3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("Degree of freedom", "R<sup>2</sup>", "<i>p</i>-value", " ")) %>%
kbl(format = "html", escape = 0) %>%
kable_styling(full_width = 0, html_font = "sans")
| Degree of freedom | R2 | p-value | ||
|---|---|---|---|---|
| Sample type | 3 | 0.342 | 0.000 | *** |
| Treatment | 5 | 0.065 | 0.000 | *** |
| Subject | 17 | 0.229 | 0.000 | *** |
| log10(Final reads) | 1 | 0.010 | 0.004 | ** |
| Sample type * Treatment | 15 | 0.162 | 0.000 | *** |
| Residual | 81 | 0.192 | NA | |
| Total | 122 | 1.000 | NA |
Function beta - stratified
Not included in the main text
Table. Degree of freedom, effect size (residual, R^2) and p-value of permutational ANOVA for functional Horn-Morisita distances with an interaction term and strata term (BC-distance of functions ~ sample type * treatment + log10(final reads), strata = subject id).
a <- bray_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
b <- bray_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
c <- bray_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
tables9 <- cbind(a, b, c) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
tables9
| R2 | p-value | R2 | p-value | R2 | p-value | ||||
|---|---|---|---|---|---|---|---|---|---|
| lyPMA | 0.047 | 0.098 | 0.091 | 0.009 | ** | 0.023 | 0.214 | ||
| Benzonase | 0.014 | 0.673 | 0.018 | 0.242 | -0.021 | 1.000 | |||
| Host zero | 0.036 | 0.157 | 0.014 | 0.592 | 0.065 | 0.032 |
|
||
| Molysis | 0.071 | 0.040 |
|
0.033 | 0.017 |
|
0.128 | 0.004 | ** |
| QIAamp | 0.119 | 0.010 |
|
0.047 | 0.103 | 0.356 | 0.000 | *** | |
| log10(Final reads) | 0.137 | 0.002 | ** | 0.126 | 0.003 | ** | 0.007 | 0.836 | |
| Residual | 0.576 | NA | 0.672 | NA | 0.442 | NA | |||
| Total | 1.000 | NA | 1.000 | NA | 1.000 | NA |
xii. What type of fuction were affected by the treatment?
Function DA analysis
#DA analysis - MaAslin
#Running MaAslin for all sample without decontam
#for taxa differentially abundant by host depletion method, look to see which ones overlap with potential contaminant taxa
# Maaslin - # # y ~ log(final reads) + sample_type + treatment -----------
#all samples
f_maaslin_all <- read.csv("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/data/filt_f_maaslin_all.csv")
f_fit_data_bal <- read.csv("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/data/filt_f_fit_data_bal.csv")
f_fit_data_spt <- read.csv("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/data/filt_f_fit_data_spt.csv")
f_fit_data_ns <- read.csv("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/data/filt_f_fit_data_ns.csv")
f_fit_data_pos <- read.csv("/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/5_Scripts/MGK/Host_depletion_git/data/filt_f_fit_data_pos.csv")
Again, most of DA functions were sample type specific
#Making significance table for figure
# Define a function to make species names italicized
# Make a significance table for each figure (top 20 taxa)
make_sig_table <- function(data) {
sig_data <- spread(data[order(data$qval), c("feature", "metadata", "qval")], metadata, qval)
sig_data$feature <- gsub("[.]", "-", sig_data$feature)
sig_data$min <- apply(sig_data %>% dplyr::select(c("lypma", "benzonase", "molysis", "host_zero", "qiaamp")), 1, FUN = min)
sig_data <- sig_data[order(sig_data$min),] %>% dplyr::select("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp") %>% .[1:20,]
sig_data[["feature"]] <- ifelse(sig_data[["feature"]] == "X.Collinsella._massiliensis", "[Collinsella]_massiliensis", sig_data[["feature"]])
sig_data_italic <- sig_data %>% rownames_to_column(var = "-") %>%
column_to_rownames(var = "feature") %>% dplyr::select(-c("-")) %>%
rename(lyPMA = lypma, Benzonase = benzonase, `Host zero` = host_zero, Molysis = molysis, QIAamp = qiaamp)
sig_data_sig <- ifelse(sig_data_italic < 0.1, "*", NA) %>% data.frame(check.names = F)
return(list(data = sig_data, data_italic = sig_data_italic, data_sig = sig_data_sig))
}
f_fit_data_pos <- make_sig_table(f_fit_data_pos)
f_fit_data_bal <- make_sig_table(f_fit_data_bal)
f_fit_data_ns <- make_sig_table(f_fit_data_ns)
f_fit_data_spt <- make_sig_table(f_fit_data_spt)
f_pos_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Mock"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Mock")) %in% f_fit_data_pos$data$feature)
f_fit_data_pos$rel <- cbind(f_pos_sig %>% otu_table %>% t, f_pos_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_pos$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_spt_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Sputum"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Sputum")) %in% f_fit_data_spt$data$feature)
f_fit_data_spt$rel <- cbind(f_spt_sig %>% otu_table %>% t, f_spt_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_spt$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_ns_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "Nasal"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "Nasal")) %in% f_fit_data_ns$data$feature)
f_fit_data_ns$rel <- cbind(f_ns_sig %>% otu_table %>% t, f_ns_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_ns$data_italic),] %>% mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
f_fit_data_ns$rel$feature <- row.names(f_fit_data_ns$data_sig)
f_bal_sig <- subset_taxa(subset_samples(phyloseq_rel_nz, sample_type == "BAL"),
taxa_names(subset_samples(phyloseq_rel_nz, sample_type == "BAL")) %in% f_fit_data_bal$data$feature)
f_fit_data_bal$rel <- cbind(f_bal_sig %>% otu_table %>% t, f_bal_sig %>% sample_data) %>% group_by(treatment) %>% summarise_if(is.numeric, mean, na.rm = TRUE) %>% .[, 1:21] %>% column_to_rownames(., "treatment") %>% t () %>% data.frame(check.names = F) %>%
.[row.names(f_fit_data_bal$data_italic),] %>%
mutate_all(~na_if(., 0)) %>% rownames_to_column("feature")
MaAslin renaming
rename_function_gruop <- function(list_maaslin_result){
taxa_df <- tax_table(phyloseq$phyloseq_path_cpm) %>%
data.frame %>% remove_rownames() %>% rename(feature = "pathway")
tax_table(phyloseq$phyloseq_path_cpm)
list_maaslin_result$data <-
list_maaslin_result$data %>%
merge(., taxa_df, by = "feature") %>%
dplyr::select(-c("feature")) %>%
rename(feature = "group") %>%
dplyr::select(c("feature", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"))
list_maaslin_result$data_italic <-
list_maaslin_result$data_italic %>%
rownames_to_column("feature") %>%
merge(., taxa_df, by = "feature") %>%
dplyr::select(-c("feature")) %>%
column_to_rownames("group")
list_maaslin_result$data_sig <-
list_maaslin_result$data_sig %>%
rownames_to_column("feature") %>%
merge(., taxa_df, by = "feature") %>%
dplyr::select(-c("feature")) %>%
column_to_rownames("group")
list_maaslin_result$rel <-
list_maaslin_result$rel %>%
merge(., taxa_df, by = "feature") %>%
dplyr::select(-c("feature")) %>%
rename(feature = "group") %>%
dplyr::select(c("feature", "Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"))
list_maaslin_result
}
f_fit_data_pos <- rename_function_gruop(f_fit_data_pos)
f_fit_data_bal <- rename_function_gruop(f_fit_data_bal)
f_fit_data_ns <- rename_function_gruop(f_fit_data_ns)
f_fit_data_spt <- rename_function_gruop(f_fit_data_spt)
Fig. S10 MaAslin function - volcano plot
Fig. S10. Volcano plot of differential abundance of function by each treatment with a model MaAsLin (copies per million of each function ~ sample type + lyPMA + Benzonase + Host zero + Molysis + QIAamp, random effect = subject id).
#Volcano plot
figS9 <- ggplot(f_maaslin_all, aes(y = -log10(qval), x = coef, col = metadata)) +
theme_classic(base_family = "sans") +
#labs(tag = "A") +
geom_point(size = 2, alpha = 0.5) +
xlab("MaAslin coefficient") +
ylab("-log<sub>10</sub>(*q*-value)") +
geom_hline(yintercept = 1, col = "gray") +
geom_vline(xintercept = 0, col = "gray") +
annotate(family = "sans",
geom='richtext',
x=0, y=80,
label = "<i>q</i>-value = 0.1, fold-change = 0") +
theme(legend.position = "top", axis.title.y = ggtext::element_markdown()) +
scale_color_manual(values = c("grey", "#f781bf", "#377eb8", "#ff7f00", "#ffff33", "#a65628"),
breaks = c("sample_type", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("Sample type", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
guides(col = guide_legend(title = "Factors", title.position = "top", nrow = 2))
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS10.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 90, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
figS9
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
figS9
Fig. S11 Function baloon plot
Fig. S11. Mean copies per million of top 20 significant function identified by differential abundance analysis using MaAsLin. Analyses were stratified by sample type. (A) Mock community, (B) bronchoalveolar lavage, (C) nasal swabs, and (D) sputum. Statistical significances were noted at the level of q-value < 0.1.
f5a <- merge(f_fit_data_pos$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
f_fit_data_pos$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(f_fit_data_pos$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(sig < 0.1 ~ "< 0.1",
.default = "> 0.1"),
value = value * 1000000) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
gradient_fill(c("#006d2c", "#edf8fb")) +
xlab("Experimental group") +
ylab("Function") +
labs(tag = "A") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 7),
axis.title.x = element_text(margin = margin(t = 20))) +
#Adding significance asterisks
scale_fill_manual(values = c("grey", "red"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
nrow = 2),
size = guide_legend(title = "Copies per million",
title.position = "top",
order = 1,
nrow = 1),
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
#ffff33 qia
f5b <- merge(f_fit_data_bal$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
f_fit_data_bal$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(f_fit_data_bal$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(sig < 0.1 ~ "< 0.1",
.default = "> 0.1"),
value = value * 1000000) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
#xlab("Experimental group") +
#ylab("Species") +
#labs(tag = "A") +
ggtitle("A BAL") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 8),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
plot.margin = unit(c(0,0.2,0,1), 'lines')) +
scale_fill_manual(values = c("red", "grey"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
nrow = 2,
override.aes = list(size=3)),
size = guide_legend(title = "Copies per million",
title.position = "top",
order = 1,
nrow = 1)
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
f5c <- merge(f_fit_data_ns$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
f_fit_data_ns$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(f_fit_data_ns$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(sig < 0.1 ~ "< 0.1",
.default = "> 0.1"),
value = value * 1000000) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
#xlab("Experimental group") +
#ylab("Species") +
#labs(tag = "B") +
ggtitle("B Nasal swab") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 8),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
plot.margin = unit(c(0,0.2,0,1), 'lines')) +
scale_fill_manual(values = c("red", "grey"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
override.aes = list(size=3)),
size = guide_legend(title = "Copies per million",
title.position = "top",
order = 1,
nrow = 1)
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
f5d <- merge(f_fit_data_spt$rel %>%
gather(treatment,
value,
Untreated:QIAamp,
factor_key=TRUE),
f_fit_data_spt$data_italic %>%
rownames_to_column("feature") %>%
gather(treatment,
qval,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
merge(f_fit_data_spt$data_sig %>%
rownames_to_column("feature") %>%
gather(treatment,
sig,
lyPMA:QIAamp,
factor_key=TRUE),
by.x = c('feature', 'treatment'),
by.y = c('feature', 'treatment'),
all = T) %>%
mutate(sig = case_when(sig < 0.1 ~ "< 0.1",
.default = "> 0.1"),
value = value * 1000000) %>%
#Baloon plot
ggballoonplot(size = "value", y = "feature", x= "treatment", fill = "sig") +
theme_classic(base_family = "sans") +
#colors for qvalues
#xlab("Experimental group") +
#ylab("Species") +
#labs(tag = "C") +
ggtitle("C Sputum") +
theme(panel.grid.major = element_line(colour = "grey"),
legend.position = "top",
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
#Element markdown for taxa name italicizing
axis.text.y = ggtext::element_markdown(size = 8),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
plot.margin = unit(c(0,0.2,0,1), 'lines')) +
scale_fill_manual(values = c("red", "grey"), aes(y = feature,
x = treatment,
label = sig)) +
guides(fill = guide_legend(title = c(expression(paste(italic("q"),
"-value",
sep = ""))),
title.position = "top",
override.aes = list(size=5)),
size = guide_legend(title = "Copies per million",
title.position = "top",
order = 1,
nrow = 1)
) +
scale_x_discrete(labels=c("control" = "Untreated",
"lypma" = "lyPMA",
"benzonase" = "Benzonase",
"host_zero" = "Host-zero",
"molysis" = "Molysis",
"qiaamp" = "QIAamp")
)
figS10 <- ggarrange(f5d %>% lemon::g_legend() %>% as_ggplot,
f5b,
f5c,
f5d,
ncol=1, heights = c(1.5, 4, 4, 4),
legend = "none",
align = "hv")
annotate_figure(figS10,
left = text_grob("Predicted function",
rot = 90,
family = "sans",
size = 11),
bottom = text_grob("Treatment",
rot = 0,
family = "sans",
size = 11, hjust = -3)
)
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS11.png", # The directory you want to save the file in
width = 240, # The width of the plot in inches
height = 220, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
annotate_figure(figS10,
left = text_grob("Predicted function",
rot = 90,
family = "sans",
size = 11),
bottom = text_grob("Treatment",
rot = 0,
family = "sans",
size = 11, hjust = -3)
)
# alpha diversity plots
#ggarrange(f4ad, ggarrange(f4e, f4f, ncol = 2),
# ncol = 1) # alpha diversity plots
dev.off()
## quartz_off_screen
## 2
3.5. Sensitivity analysis after decontamination
xiii. Is species richness similar after decontamination?
Sensitivity analysis
Fig. S12. Rarefraction curve of species richenss
Fig. S12. Rarefaction curve for (A) species richness and (B) function richenss stratified by sample type, after removing possible contaminant-taxa identified by decontam and low prevalent taxa.
As a sanity check, rarefaction curves were generated and seemed to be saturated
fig_rarefraction <- phyloseq$phyloseq_count %>%
sample_data %>%
data.frame %>%
subset(., !is.na(.$treatment) & sample_type %in% c("BAL", "Nasal", "Sputum")) %>%
ggplot(., aes(x = Final_reads/1000000, y = S.obs, col = treatment)) +
geom_point() +
theme_classic(base_family = "sans") +
xlab("Final reads x 10<sup>6</sup>") +
ylab("Species richness") +
labs(tag = "A") +
theme(axis.title.x = element_markdown(), legend.position = "top") +
guides(col = guide_legend(title = "Treatment", title.position = "top", nrow = 1)) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
facet_wrap(~sample_type, scales = "free", nrow = 1)
fig_rarefraction_function <- phyloseq$phyloseq_path_rpk %>%
sample_data %>%
data.frame %>%
subset(., !is.na(.$treatment) & sample_type %in% c("BAL", "Nasal", "Sputum")) %>%
ggplot(., aes(x = Final_reads/1000000, y = S.obs, col = treatment)) +
geom_point() +
theme_classic(base_family = "sans") +
xlab("Final reads x 10<sup>6</sup>") +
ylab("Function richness") +
labs(tag = "B") +
theme(axis.title.x = element_markdown(), legend.position = "top") +
guides(col = guide_legend(title = "Treatment", title.position = "top", nrow = 1)) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
facet_wrap(~sample_type, scales = "free", nrow = 1)
ggarrange(fig_rarefraction, fig_rarefraction_function, common.legend = T, ncol = 1)
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS12.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 180, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
ggarrange(fig_rarefraction, fig_rarefraction_function, common.legend = T, ncol = 1)
dev.off()
## quartz_off_screen
## 2
Fig. S13. Species richness of decontaminated output
As some of the species richness got way higher, this data cannot be used for alpha diversity indices.
Fig. S13. Species richness of (A) raw data after prevalence and abundance filtering, (B) decontaminated species richness with decontam37, and (C) decontaminated data using tinyvamp.
f10a <- ggplot(subset(sample_data(phyloseq$phyloseq_count) %>%
data.frame, sample_data(phyloseq$phyloseq_count)$sample_type %in% c("Sputum", "Nasal", "BAL", "Mock")), aes(x = treatment, y = S.obs)) +
geom_jitter(aes(color = treatment), position = position_jitter(0.2), size = 1.2) +
stat_summary(aes(color = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
ylab("Species richness") +
xlab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using
labs(tag = "A") +
ggtitle("Prevalence & abundance filtered data") +
theme(plot.tag = element_text(size = 15),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top") +
facet_wrap(~sample_type, nrow = 1) +
guides(col = guide_legend(nrow = 1))
f10b <- ggplot(subset(sample_data(phyloseq$phyloseq_count) %>%
data.frame, sample_data(phyloseq$phyloseq_count)$sample_type %in% c("Sputum", "Nasal", "BAL", "Mock")), aes(x = treatment, y = S.obs)) +
geom_jitter(aes(color = treatment), position = position_jitter(0.2), size = 1.2) +
stat_summary(aes(color = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
ylab("Species richness") +
xlab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using
labs(tag = "B") +
ggtitle("Decontaminated data 1") +
theme(plot.tag = element_text(size = 15),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top") +
facet_wrap(~sample_type, nrow = 1) +
guides(col = guide_legend(nrow = 1))
f10c <- ggplot(subset(sample_data(phyloseq_tv) %>%
data.frame, sample_data(phyloseq_tv)$sample_type %in% c("Sputum", "Nasal", "BAL", "Mock")), aes(x = treatment, y = S.obs)) +
geom_jitter(aes(color = treatment), position = position_jitter(0.2), size = 1.2) +
stat_summary(aes(color = treatment),
fun.data="mean_sdl", fun.args = list(mult=1),
geom = "pointrange", size = 0.4) +
ylab("Species richness") +
xlab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using
labs(tag = "C") +
ggtitle("Decontaminated data 2") +
theme(plot.tag = element_text(size = 15),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "top") +
facet_wrap(~sample_type, nrow = 1) +
guides(col = guide_legend(nrow = 1))
figa1 <- ggarrange(f10a, f10b, f10c, common.legend = T, ncol = 1)
annotate_figure(figa1,
left = text_grob("Species richness",
rot = 90,
family = "sans",
size = 11),
bottom = text_grob("Treatment",
rot = 0,
family = "sans",
size = 11)
)
png(file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/FigureS13.png", # The directory you want to save the file in
width = 180, # The width of the plot in inches
height = 180, # The height of the plot in inches
units = "mm",
res = 600
) #fixing multiple page issue
annotate_figure(figa1,
left = text_grob("Species richness",
rot = 90,
family = "sans",
size = 11),
bottom = text_grob("Treatment",
rot = 0,
family = "sans",
size = 11)
)
dev.off()
## quartz_off_screen
## 2
Table S10. Species richness change after decontamination
Table S10. Effect size (95% confidence interval) and p-value of decontaminated species richness using decontam37 (decontaminated data 1) and tinyvamp (decontaminated data 2). The change was tested using a model lmer(species richness~ treatment + (1|subject id)). Statistical significances were noted with : p-value < 0.05, : p-value < 0.01, and : p-value < 0.001.
sr_lmer_bal_decontam <- lmer(S.obs ~ treatment + (1|subject_id),
data = sample_data(phyloseq_decontam$phyloseq_rel) %>%
data.frame %>%
subset(.,.$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("bal_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_ns_decontam <- lmer(S.obs ~ treatment + (1|subject_id),
data = sample_data(phyloseq_decontam$phyloseq_rel) %>%
data.frame %>%
subset(.,.$sample_type == "Nasal")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("ns_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = ),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = ),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = ),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_spt_decontam <- lmer(S.obs ~ treatment + (1|subject_id),
data = sample_data(phyloseq_decontam$phyloseq_rel) %>%
data.frame %>%
subset(.,.$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("spt_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_bal <- lmer(S.obs ~ treatment + (1|subject_id),
data = sample_data(phyloseq_tv) %>%
data.frame %>%
subset(.,.$sample_type == "BAL")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("bal_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_ns <- lmer(S.obs ~ treatment + (1|subject_id),
data = sample_data(phyloseq_tv) %>%
data.frame %>%
subset(.,.$sample_type == "Nasal")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("ns_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = ),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = ),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = ),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
sr_lmer_spt <- lmer(S.obs ~ treatment + (1|subject_id),
data = sample_data(phyloseq_tv) %>%
data.frame %>%
subset(.,.$sample_type == "Sputum")) %>%
summary() %>%
.$coefficients %>%
data.frame(check.names = F) %>%
mutate(` ` = case_when(abs(`Pr(>|t|)`) < 0.001 ~ "***",
abs(`Pr(>|t|)`) < 0.01 ~ "**",
abs(`Pr(>|t|)`) < 0.05 ~ "*",
.default = " ")) %>%
rownames_to_column(var = "x") %>% mutate(x = gsub("treatment|sample_type", "", x)) %>% mutate(x = gsub(":", " * ", x)) %>%
mutate(x = gsub("spt_log_centered_final_reads", "log<sub>10</sub>(Final reads)", x)) %>%
column_to_rownames(var = "x") %>%
rename("<i>p</i>-value" = "Pr(>|t|)",
SE = "Std. Error") %>%
mutate("Effect size (95% CI)" = paste(round(Estimate, 1) %>% format(nsmall = 1),
" (",
round(Estimate - 1.96 * SE, 1) %>% format(nsmall = 1),
", ",
round(Estimate + 1.96 * SE, 1) %>% format(nsmall = 1),
")",
sep = ""),
"<i>p</i>-value" = round(`<i>p</i>-value`, 3)) %>%
dplyr::select(c("Effect size (95% CI)", "<i>p</i>-value", " "))
tableA1 <- cbind(
cbind(sr_lmer_bal_decontam, sr_lmer_ns_decontam, sr_lmer_spt_decontam),
cbind(sr_lmer_bal, sr_lmer_ns, sr_lmer_spt) %>% remove_rownames()) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" "= 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
add_header_above(c(" " = 1, "Decontaminated data 1" = 9, "Decontaminated data 2"= 9)) %>%
kable_styling(full_width = 0, html_font = "sans")
tableA1
| Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | Effect size (95% CI) | p-value | |||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| (Intercept) | 3.0 (-7.1, 13.1) | 0.572 | 8.3 ( 5.7, 10.9) | 0.000 | *** | 14.4 (-4.5, 33.3) | 0.171 | 3.6 (-5.2, 12.4) | 0.436 | 6.3 ( 4.9, 7.7) | 0.000 | *** | 8.0 (-2.0, 18.0) | 0.152 | ||||
| lyPMA | 0.6 (-9.5, 10.7) | 0.909 | -5.5 (-9.9, -1.1) | 0.022 |
|
32.0 (14.9, 49.1) | 0.002 | ** | 1.0 (-8.3, 10.3) | 0.835 | -1.5 (-3.9, 0.9) | 0.235 | 14.8 ( 5.5, 24.1) | 0.005 | ** | |||
| Benzonase | 5.2 (-4.9, 15.3) | 0.327 | -0.7 (-5.1, 3.8) | 0.774 | 59.0 (41.9, 76.1) | 0.000 | *** | 4.0 (-4.9, 12.9) | 0.392 | 0.1 (-2.3, 2.5) | 0.937 | 26.2 (16.9, 35.5) | 0.000 | *** | ||||
| Host zero | 7.4 (-2.7, 17.5) | 0.168 | 8.3 ( 3.9, 12.8) | 0.001 | ** | 92.0 (74.9, 109.1) | 0.000 | *** | 6.0 (-2.9, 14.9) | 0.204 | 6.3 ( 3.9, 8.7) | 0.000 | *** | 45.2 (35.9, 54.5) | 0.000 | *** | ||
| Molysis | 16.4 ( 6.3, 26.5) | 0.005 | ** | 5.1 ( 0.7, 9.5) | 0.032 |
|
100.8 (83.7, 117.9) | 0.000 | *** | 12.0 ( 3.1, 20.9) | 0.017 |
|
3.1 ( 0.7, 5.5) | 0.019 |
|
51.8 (42.5, 61.1) | 0.000 | *** |
| QIAamp | 8.2 (-1.9, 18.3) | 0.129 | 6.5 ( 2.0, 10.9) | 0.008 | ** | 76.0 (58.9, 93.1) | 0.000 | *** | 6.2 (-2.7, 15.1) | 0.190 | 6.3 ( 3.9, 8.7) | 0.000 | *** | 35.2 (25.9, 44.5) | 0.000 | *** |
save_kable(tableA1, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/tableS10.html", self_contained = T)
Beta diversity distances of decontaminated output
As the increase in species richness are due to slight increase of relative abundanace of some taxa (became 0 to 0.0001, etc.), beta-diversity indice can be used for assessing the changes after host-depletion
#Making subset of non-zero samples without neg
phyloseq_rel_nz <- phyloseq$phyloseq_rel %>%
subset_samples(S.obs != 0 & sample_type %in% c("Mock", "BAL", "Nasal", "Sputum"))
#distances of betadiversity - boxplots
horn_dist_long <- distance(phyloseq_rel_nz, method="horn") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `horn_dist_long`
names <- data.frame(str_split_fixed(horn_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(horn_dist_long$iso2, "_", 3))
horn_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
horn_dist_long$method_1 <- ifelse(grepl("lyPMA", horn_dist_long$iso1),"lypma",
ifelse(grepl("ben", horn_dist_long$iso1),"benzonase",
ifelse(grepl("host", horn_dist_long$iso1),"host_zero",
ifelse(grepl("qia", horn_dist_long$iso1),"qiaamp",
ifelse(grepl("moly", horn_dist_long$iso1),"molysis",
"control")))))
#Adding data for iso 2 also should be done
horn_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
horn_dist_long$method_2 <-ifelse(grepl("lyPMA", horn_dist_long$iso2),"lypma",
ifelse(grepl("ben", horn_dist_long$iso2),"benzonase",
ifelse(grepl("host", horn_dist_long$iso2),"host_zero",
ifelse(grepl("qia", horn_dist_long$iso2),"qiaamp",
ifelse(grepl("moly", horn_dist_long$iso2),"molysis",
"control")))))
#subsetting distances of my interest
horn_dist_long$sample_id_1 <- ifelse(grepl("pos", horn_dist_long$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_1, ignore.case = T),"Neg.",
horn_dist_long$sample_id_1))
horn_dist_long$sample_id_2 <- ifelse(grepl("pos", horn_dist_long$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_2, ignore.case = T),"Neg.",
horn_dist_long$sample_id_2))
path_horn_dist_long_within_sampleid_from_control <- subset(horn_dist_long, horn_dist_long$sample_id_1 == horn_dist_long$sample_id_2) # data within samples
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control,
path_horn_dist_long_within_sampleid_from_control$method_1 != path_horn_dist_long_within_sampleid_from_control$method_2) # remove irrelevant association
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control, (path_horn_dist_long_within_sampleid_from_control$method_1 == "control") + (path_horn_dist_long_within_sampleid_from_control$method_2 == "control") != 0)
path_horn_dist_long_within_sampleid_from_control$treatment <- path_horn_dist_long_within_sampleid_from_control$method_1
path_horn_dist_long_within_sampleid_from_control$treatment <- ifelse(path_horn_dist_long_within_sampleid_from_control$treatment == "control", path_horn_dist_long_within_sampleid_from_control$method_2, path_horn_dist_long_within_sampleid_from_control$treatment)
#Setting key method
path_horn_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_horn_dist_long_within_sampleid_from_control$iso1), "Nasal",
ifelse(grepl("CFB", path_horn_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_horn_dist_long_within_sampleid_from_control$iso1), "BAL",
ifelse(grepl("pos|POS", path_horn_dist_long_within_sampleid_from_control$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", path_horn_dist_long_within_sampleid_from_control$iso1), "Neg.",NA)))))
#Making a column for baseline (controls, from where?)
path_horn_dist_long_within_sampleid_from_control <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(dist_from = case_when(method_1 == "control" ~ iso1,
method_2 == "control" ~ iso2))
dummy <- data.frame(iso1 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
iso2 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
dist = 0,
treatment = "Untreated",
method_1 = "control",
method_2 = "control"
)
names <- data.frame(str_split_fixed(dummy$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(dummy$iso2, "_", 3))
dummy$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
#Adding data for iso 2 also should be done
dummy$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
#subsetting distances of my interest
dummy$sample_id_1 <- ifelse(grepl("pos", dummy$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", dummy$sample_id_1, ignore.case = T),"Neg.",
dummy$sample_id_1))
dummy$sample_id_2 <- ifelse(grepl("pos", dummy$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", dummy$sample_id_2, ignore.case = T),"Neg.",
dummy$sample_id_2))
dummy$sample_type <- ifelse(grepl("NS", dummy$iso1), "Nasal",
ifelse(grepl("CFB", dummy$iso1), "Sputum",
ifelse(grepl("BAL", dummy$iso1), "BAL",
ifelse(grepl("pos|POS", dummy$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", dummy$iso1), "Neg.",NA)))))
dummy <- subset(dummy, !is.na(dummy$sample_type))
path_horn_dist_long_within_sampleid_from_control <- bind_rows(path_horn_dist_long_within_sampleid_from_control, dummy)
#Making figure of beta diversity distances
fd1 <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(across(sample_type, factor, levels=c("Mock", "BAL", "Nasal","Sputum"))) %>%
subset(., .$sample_type != "Neg.") %>%
group_by(sample_type, treatment) %>%
summarise(mean = mean(dist, na.rm = TRUE),
sd = sd(dist, na.rm = TRUE),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
treatment = factor(treatment, levels = c("Untreated", "lypma", "benzonase", "host_zero", "molysis", "qiaamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp"))) %>%#,
#text = paste(sprintf("%.2f", round(mean, digits = 2)), " [", sprintf("%.2f", round(lower.ci, digits = 2)), ", ", sprintf("%.2f", round(upper.ci, digits = 2)), "]", sep = "")) %>%
ggplot(aes(x = mean, y = treatment, col = treatment)) +
geom_point(aes(x=mean), shape=15, size=3) +
geom_linerange(aes(xmin=lower.ci, xmax=upper.ci)) +
facet_wrap(~sample_type, nrow = 4) +
scale_y_discrete(limits=rev) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
xlab("Distance from untreated") +
ylab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
theme(plot.tag = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none") +
labs(tag = "A") +
ggtitle("M-H dist for raw composition") +
geom_vline(xintercept = 0, col = "black", linetype="dotted") +
#coord_cartesian(xlim=c(-0.5, 1)) +
#geom_text(aes(x = 0, label = treatment), hjust = 0, nudge_x = -.55, size = 3, color = "black", family = "sans") +
#geom_text(aes(x = 0, label = text), hjust = 0, nudge_x = -0.4, size = 3, color = "black", family = "sans") +
scale_x_continuous(breaks = c(-0.5, 0, 0.5, 1, 1.5), labels = c(-0.5, "0 (low bias)", 0.5, 1, "1.5 (high bias)"))
#Making subset of non-zero samples without neg
phyloseq_rel_nz_tv <- phyloseq_tv %>%
subset_samples(S.obs != 0 & sample_type %in% c("Mock", "BAL", "Nasal", "Sputum"))
#distances of betadiversity - boxplots
horn_dist_long <- distance(phyloseq_rel_nz_tv, method="horn") %>% as.matrix() %>% melt_dist() #making long data of distance matrices
#Adding sample type and treatment name.
#this can be also done by merging metadata into the `horn_dist_long`
names <- data.frame(str_split_fixed(horn_dist_long$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(horn_dist_long$iso2, "_", 3))
horn_dist_long$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
horn_dist_long$method_1 <- ifelse(grepl("lyPMA", horn_dist_long$iso1),"lypma",
ifelse(grepl("Ben", horn_dist_long$iso1),"benzonase",
ifelse(grepl("Host", horn_dist_long$iso1),"host zero",
ifelse(grepl("QIA", horn_dist_long$iso1),"qiaamp",
ifelse(grepl("Moly", horn_dist_long$iso1),"molysis",
"control")))))
#Adding data for iso 2 also should be done
horn_dist_long$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
horn_dist_long$method_2 <-ifelse(grepl("lyPMA", horn_dist_long$iso2),"lypma",
ifelse(grepl("Ben", horn_dist_long$iso2),"benzonase",
ifelse(grepl("Host", horn_dist_long$iso2),"host_zero",
ifelse(grepl("QIA", horn_dist_long$iso2),"qiaamp",
ifelse(grepl("Moly", horn_dist_long$iso2),"molysis",
"control")))))
#subsetting distances of my interest
horn_dist_long$sample_id_1 <- ifelse(grepl("pos", horn_dist_long$sample_id_1, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_1, ignore.case = T),"Neg.",
horn_dist_long$sample_id_1))
horn_dist_long$sample_id_2 <- ifelse(grepl("pos", horn_dist_long$sample_id_2, ignore.case = T),"Mock",
ifelse(grepl("neg|n_", horn_dist_long$sample_id_2, ignore.case = T),"Neg.",
horn_dist_long$sample_id_2))
path_horn_dist_long_within_sampleid_from_control <- subset(horn_dist_long, horn_dist_long$sample_id_1 == horn_dist_long$sample_id_2) # data within samples
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control,
path_horn_dist_long_within_sampleid_from_control$method_1 != path_horn_dist_long_within_sampleid_from_control$method_2) # remove irrelevant association
path_horn_dist_long_within_sampleid_from_control <- subset(path_horn_dist_long_within_sampleid_from_control, (path_horn_dist_long_within_sampleid_from_control$method_1 == "control") + (path_horn_dist_long_within_sampleid_from_control$method_2 == "control") != 0)
path_horn_dist_long_within_sampleid_from_control$treatment <- path_horn_dist_long_within_sampleid_from_control$method_1
path_horn_dist_long_within_sampleid_from_control$treatment <- ifelse(path_horn_dist_long_within_sampleid_from_control$treatment == "control", path_horn_dist_long_within_sampleid_from_control$method_2, path_horn_dist_long_within_sampleid_from_control$treatment)
#Setting key method
path_horn_dist_long_within_sampleid_from_control$sample_type <- ifelse(grepl("NS", path_horn_dist_long_within_sampleid_from_control$iso1), "Nasal",
ifelse(grepl("CFB", path_horn_dist_long_within_sampleid_from_control$iso1), "Sputum",
ifelse(grepl("BAL", path_horn_dist_long_within_sampleid_from_control$iso1), "BAL",
ifelse(grepl("pos|POS", path_horn_dist_long_within_sampleid_from_control$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", path_horn_dist_long_within_sampleid_from_control$iso1), "Neg.",NA)))))
#Making a column for baseline (controls, from where?)
path_horn_dist_long_within_sampleid_from_control <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(dist_from = case_when(method_1 == "control" ~ iso1,
method_2 == "control" ~ iso2))
dummy <- data.frame(iso1 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
iso2 = path_horn_dist_long_within_sampleid_from_control$dist_from %>% unique,
dist = 0,
treatment = "Untreated",
method_1 = "control",
method_2 = "control"
)
names <- data.frame(str_split_fixed(dummy$iso1, "_", 3))
names2 <- data.frame(str_split_fixed(dummy$iso2, "_", 3))
dummy$sample_id_1 <- paste(names$X1, names$X2, sep = "_")
#Adding data for iso 2 also should be done
dummy$sample_id_2 <- paste(names2$X1, names2$X2, sep = "_")
#subsetting distances of my interest
dummy$sample_type <- ifelse(grepl("NS", dummy$iso1), "Nasal",
ifelse(grepl("CFB", dummy$iso1), "Sputum",
ifelse(grepl("BAL", dummy$iso1), "BAL",
ifelse(grepl("pos|POS", dummy$iso1, ignore.case = T), "Mock",
ifelse(grepl("neg|N_EXT", dummy$iso1), "Neg.",NA)))))
path_horn_dist_long_within_sampleid_from_control <- bind_rows(path_horn_dist_long_within_sampleid_from_control,
dummy)
#Making figure of beta diversity distances
fd2 <- path_horn_dist_long_within_sampleid_from_control %>%
mutate(across(sample_type, factor, levels=c("BAL", "Nasal","Sputum"))) %>%
subset(., .$sample_type != "Neg.") %>%
group_by(sample_type, treatment) %>%
summarise(mean = mean(dist, na.rm = TRUE),
sd = sd(dist, na.rm = TRUE),
n = n()) %>%
mutate(se = sd / sqrt(n),
lower.ci = mean - qt(1 - (0.05 / 2), n - 1) * se,
upper.ci = mean + qt(1 - (0.05 / 2), n - 1) * se,
treatment = factor(treatment, levels = c("Untreated", "lypma", "benzonase", "host zero", "molysis", "qiaamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp"))) %>%#,
#text = paste(sprintf("%.2f", round(mean, digits = 2)), " [", sprintf("%.2f", round(lower.ci, digits = 2)), ", ", sprintf("%.2f", round(upper.ci, digits = 2)), "]", sep = "")) %>%
ggplot(aes(x = mean, y = treatment, col = treatment)) +
geom_point(aes(x=mean), shape=15, size=3) +
geom_linerange(aes(xmin=lower.ci, xmax=upper.ci)) +
facet_wrap(~sample_type, nrow = 4) +
scale_y_discrete(limits=rev) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"), name = "Treatment", labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
xlab("Distance from untreated") +
ylab("Treatment group") +
theme_classic (base_size = 12, base_family = "sans") +
theme(plot.tag = element_text(size = 15),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none") +
labs(tag = "B") +
ggtitle("M-H dist for decontaminated composition") +
geom_vline(xintercept = 0, col = "black", linetype="dotted") +
#coord_cartesian(xlim=c(-0.5, 1)) +
#geom_text(aes(x = 0, label = treatment), hjust = 0, nudge_x = -.55, size = 3, color = "black", family = "sans") +
#geom_text(aes(x = 0, label = text), hjust = 0, nudge_x = -0.4, size = 3, color = "black", family = "sans") +
scale_x_continuous(breaks = c(-0.5, 0, 0.5, 1, 1.5), labels = c(-0.5, "0 (low bias)", 0.5, 1, "1.5 (high bias)"))
figa2 <- ggarrange(fd1, fd2, ncol = 1, common.legend = T, align = "hv")
figa2
Beta diversity plot
figd2a <- ordinate(subset_samples(phyloseq_rel_nz, sample_type != "Neg." & sample_type != "Mock"), method = "PCoA", distance = "horn") %>%
plot_ordination(phyloseq_rel_nz, ., col = "treatment") +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Mock theoretical", "Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment",
breaks = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_shape(name = "Sample type", labels = c("Mock theoretical", "Mock")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "sans") +
facet_wrap(~sample_type, scales = "free") +
labs(tag = "A") +
ggtitle("Prevalence & abundance filtered data") +
theme(plot.tag = element_text(size = 15), legend.position = "top")# +
#stat_ellipse(type = "norm") +
#stat_ellipse(type = "t")
figd2b <- ordinate(subset_samples(phyloseq_decontam$phyloseq_rel, sample_type != "Neg." & sample_type != "Mock" &
S.obs != 0), method = "PCoA", distance = "horn") %>%
plot_ordination(phyloseq_decontam$phyloseq_rel, ., col = "treatment") +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Mock theoretical", "Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment",
breaks = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_shape(name = "Sample type", labels = c("Mock theoretical", "Mock")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "sans") +
facet_wrap(~sample_type, scales = "free") +
labs(tag = "B") +
ggtitle("Decontaminated data 1") +
theme(plot.tag = element_text(size = 15), legend.position = "top")# +
#stat_ellipse(type = "norm") +
#stat_ellipse(type = "t")
figd2c <- ordinate(subset_samples(phyloseq_tv, sample_type != "Neg." & sample_type != "Mock"), method = "PCoA", distance = "horn") %>%
plot_ordination(phyloseq_tv, ., col = "treatment") +
#scale_color_viridis(discrete = 6, name = "Treatment", labels = c("Mock theoretical", "Control","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) +
scale_color_manual(values = c("#e31a1c", "#fb9a99", "#33a02c", "#b2df8a", "#1f78b4", "#a6cee3"),
name = "Treatment",
breaks = c("Untreated","lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
labels = c("Untreated", "lyPMA", "Benzonase", "Host zero", "Molysis", "QIAaamp")) + #color using https://colorbrewer2.org/#type=qualitative&scheme=Set1&n=6
#scale_shape(name = "Sample type", labels = c("Mock theoretical", "Mock")) +
geom_point(size = 3) +
theme_classic (base_size = 12, base_family = "sans") +
facet_wrap(~sample_type, scales = "free") +
labs(tag = "C") +
ggtitle("Decontaminated data 2") +
theme(plot.tag = element_text(size = 15), legend.position = "top")# +
#stat_ellipse(type = "norm") +
#stat_ellipse(type = "t")
figA3 <- ggarrange(figd2a, figd2b, figd2c, common.legend = T, nrow = 3)
figA3
PERMANOVA - for sensitivity analysis
Effect size of Tinyvamp-decontamination after adjusting treatment, sequencing depth, etc.
Table Degree of freedom, effect size (residual, R^2) and p-value of permutational ANOVA for Morisita-Horn distiances for species richness stratified by sample type (MH-distance ~ lyPMA + Benzoase + Host zero + Molysis + QIAamp + log10 (Final_reads) + decontaminated, strata = subject_id).
a <- horn_perm_bal %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "decontaminated" ~ 'Decomtaminated',
row.names == "Total" ~ 'Total')) %>%
column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
b <- horn_perm_ns %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "decontaminated" ~ 'Decomtaminated',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
c <- horn_perm_spt %>% data.frame(check.names = F) %>% rownames_to_column('row.names') %>%
mutate(row.names = case_when(row.names == "lypma" ~ 'lyPMA',
row.names == "benzonase" ~ 'Benzonase',
row.names == "host_zero" ~ 'Host zero',
row.names == "molysis" ~ 'Molysis',
row.names == "qiaamp" ~ 'QIAamp',
row.names == "decontaminated" ~ 'Decomtaminated',
row.names == "subject_id" ~ 'Subject id',
row.names == "log10(Final_reads)" ~ 'log10(Final reads)',
row.names == "Residual" ~ 'Residual',
row.names == "Total" ~ 'Total')) %>% column_to_rownames('row.names') %>%
mutate(` ` = case_when(abs(`Pr(>F)`) < 0.001 ~ "***",
abs(`Pr(>F)`) < 0.01 ~ "**",
abs(`Pr(>F)`) < 0.05 ~ "*",
.default = " ")) %>%
mutate(across(is.numeric, round, digits=3)) %>%
rename("<i>p</i>-value" = "Pr(>F)",
"R<sup>2</sup>" = "R2",
"Degree of freedom" = "Df") %>%
dplyr::select(c("R<sup>2</sup>", "<i>p</i>-value", " "))
TableA2 <- cbind(a, b, c) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 3, "Nasal swab" = 3,"Sputum"= 3)) %>%
kable_styling(full_width = 0, html_font = "sans")
#Table A2
List of contaminants (Tinyvamp)
#Stratified by sample type
prev_neg
prev_all
sample_data(phyloseq_unfiltered$phyloseq_rel)$is.neg <- grepl("Neg", sample_data(phyloseq_unfiltered$phyloseq_rel)$sample_type)
contaminants_tv <- data.frame(
Taxa = subset(taxa_names(phyloseq_unfiltered$phyloseq_count),
!(taxa_names(phyloseq_unfiltered$phyloseq_count) %in%
taxa_names(phyloseq_tv))
))
merged_contaminants <- merge(contaminants_tv, prev_all %>% rownames_to_column("Taxa"), by = "Taxa") %>%
merge(., prev_neg %>% rownames_to_column("Taxa"), by = "Taxa") %>%
dplyr::select(c("Taxa", "Prevalence (all)", "Prevalence (negative controls)")) %>%
.[order(-.$"Prevalence (all)", -.$"Prevalence (negative controls)"),] %>%
remove_rownames() %>%
subset(., .$"Prevalence (all)" != 0)
tableA3 <- merged_contaminants %>%
mutate(Taxa = species_italic2(Taxa)) %>%
kbl(format = "html", escape = F) %>%
kable_styling(full_width = 0, html_font = "sans")
tableA3
| Taxa | Prevalence (all) | Prevalence (negative controls) |
|---|---|---|
| Gemella haemolysans | 69 | 1 |
| Staphylococcus argenteus | 65 | 2 |
| Staphylococcus epidermidis | 60 | 1 |
| Pseudomonas aeruginosa | 53 | 0 |
| Staphylococcus schweitzeri | 52 | 2 |
| Streptococcus mitis | 41 | 1 |
| Streptococcus oralis | 38 | 0 |
| Listeria floridensis | 34 | 1 |
| Corynebacterium atypicum | 33 | 0 |
| Streptococcus anginosus | 33 | 0 |
| Pseudomonas formosensis | 32 | 25 |
| Actinomyces odontolyticus | 32 | 0 |
| Slackia isoflavoniconvertens | 31 | 0 |
| Cryptococcus gattii VGI | 30 | 1 |
| Cryptococcus gattii VGII | 30 | 1 |
| Cryptococcus gattii VGIII | 30 | 1 |
| Saccharomyces kudriavzevii | 30 | 1 |
| Collinsella intestinalis | 30 | 0 |
| Bacillus ginsengihumi | 29 | 1 |
| S. cerevisiae x S. kudriavzevii | 29 | 1 |
| Actinomyces oris | 29 | 0 |
| Gemella morbillorum | 29 | 0 |
| Streptococcus salivarius | 29 | 0 |
| Actinomyces sp. HPA0247 | 28 | 0 |
| Actinomyces sp. oral taxon 181 | 28 | 0 |
| Rothia dentocariosa | 28 | 0 |
| Listeria innocua | 27 | 1 |
| Prevotella melaninogenica | 27 | 0 |
| Streptococcus infantis | 27 | 0 |
| Actinomyces sp. HMSC035G02 | 26 | 0 |
| Actinomyces sp. S6 Spd3 | 26 | 0 |
| Atopobium rimae | 26 | 0 |
| Olsenella scatoligenes | 26 | 0 |
| Cutibacterium granulosum | 25 | 0 |
| Streptococcus australis | 25 | 0 |
| Streptococcus gordonii | 25 | 0 |
| Streptococcus sanguinis | 25 | 0 |
| Actinomyces sp. ICM47 | 24 | 0 |
| Gemella bergeri | 24 | 0 |
| Veillonella dispar | 24 | 0 |
| Actinomyces naeslundii | 23 | 0 |
| Streptococcus sp. F0442 | 23 | 0 |
| Granulicatella adiacens | 22 | 0 |
| Collinsella stercoris | 21 | 0 |
| Streptococcus peroris | 21 | 0 |
| Actinomyces sp. oral taxon 180 | 20 | 0 |
| Eubacterium infirmum | 20 | 0 |
| Streptococcus sp. A12 | 20 | 0 |
| Streptococcus vestibularis | 20 | 0 |
| Veillonella atypica | 20 | 0 |
| Propionibacterium namnetense | 19 | 1 |
| Actinomyces viscosus | 19 | 0 |
| Streptococcus pseudopneumoniae | 19 | 0 |
| Streptococcus sp. HPH0090 | 19 | 0 |
| Corynebacterium durum | 18 | 0 |
| Eubacterium brachy | 18 | 0 |
| Propionibacterium humerusii | 18 | 0 |
| Streptococcus pneumoniae | 18 | 0 |
| Abiotrophia defectiva | 17 | 0 |
| Enorma massiliensis | 17 | 0 |
| Parvimonas sp. oral taxon 393 | 17 | 0 |
| Porphyromonas somerae | 17 | 0 |
| Prevotella histicola | 17 | 0 |
| Rothia aeria | 17 | 0 |
| Veillonella infantium | 17 | 0 |
| Actinomyces johnsonii | 16 | 0 |
| Actinomyces meyeri | 16 | 0 |
| Actinomyces sp. oral taxon 170 | 16 | 0 |
| Mogibacterium pumilum | 16 | 0 |
| Olsenella profusa | 16 | 0 |
| Actinomyces massiliensis | 15 | 0 |
| Corynebacterium pseudodiphtheriticum | 15 | 0 |
| Corynebacterium pseudogenitalium | 15 | 0 |
| Neisseria subflava | 15 | 0 |
| Prevotella salivae | 15 | 0 |
| Streptococcus mutans | 15 | 0 |
| Streptococcus sp. HMSC034E03 | 15 | 0 |
| Streptococcus sp. M334 | 15 | 0 |
| Candida parapsilosis | 14 | 0 |
| Gemella asaccharolytica | 14 | 0 |
| Listeria marthii | 14 | 0 |
| Streptococcus cristatus | 14 | 0 |
| Veillonella sp. T11011 6 | 14 | 0 |
| Parvimonas sp. oral taxon 110 | 13 | 0 |
| Prevotella jejuni | 13 | 0 |
| Streptococcus sp. HMSC067H01 | 13 | 0 |
| Streptococcus sp. HMSC071D03 | 13 | 0 |
| Candida dubliniensis | 12 | 0 |
| Mogibacterium timidum | 12 | 0 |
| Oribacterium sp. oral taxon 078 | 12 | 0 |
| Bifidobacterium dentium | 11 | 0 |
| Streptococcus milleri | 11 | 0 |
| Actinomyces georgiae | 10 | 0 |
| Actinomyces hongkongensis | 10 | 0 |
| Cardiobacterium valvarum | 10 | 0 |
| Prevotella pallens | 10 | 0 |
| Stenotrophomonas rhizophila | 10 | 0 |
| Tannerella sp. oral taxon HOT 286 | 10 | 0 |
| Atopobium deltae | 9 | 0 |
| Corynebacterium matruchotii | 9 | 0 |
| Lactobacillus rhamnosus | 9 | 0 |
| Oribacterium asaccharolyticum | 9 | 0 |
| Prevotella oris | 9 | 0 |
| Prevotella sp. oral taxon 306 | 9 | 0 |
| Stenotrophomonas pavanii | 9 | 0 |
| Microbacterium ginsengisoli | 8 | 6 |
| Pseudomonas putida | 8 | 1 |
| Actinomyces sp. oral taxon 414 | 8 | 0 |
| Actinomyces sp. oral taxon 448 | 8 | 0 |
| Actinomyces sp. oral taxon 897 | 8 | 0 |
| Eubacterium nodatum | 8 | 0 |
| Oribacterium parvum | 8 | 0 |
| Pseudomonas fluorescens | 8 | 0 |
| Corynebacterium tuberculostearicum | 7 | 0 |
| Lactobacillus gasseri | 7 | 0 |
| Staphylococcus haemolyticus | 7 | 0 |
| Streptococcus sobrinus | 7 | 0 |
| Streptococcus thermophilus | 6 | 2 |
| Achromobacter ruhlandii | 6 | 0 |
| Candida orthopsilosis | 6 | 0 |
| Porphyromonas catoniae | 6 | 0 |
| Staphylococcus capitis | 6 | 0 |
| Alloprevotella rava | 5 | 0 |
| Bifidobacterium breve | 5 | 0 |
| Corynebacterium aurimucosum | 5 | 0 |
| Cutibacterium avidum | 5 | 0 |
| Leptotrichia sp. oral taxon 215 | 5 | 0 |
| Prevotella buccae | 5 | 0 |
| Streptococcus sp. oral taxon 056 | 5 | 0 |
| Tannerella forsythia | 5 | 0 |
| Achromobacter denitrificans | 4 | 0 |
| Capnocytophaga leadbetteri | 4 | 0 |
| Eubacterium saphenum | 4 | 0 |
| Leptotrichia sp. oral taxon 212 | 4 | 0 |
| Neisseria elongata | 4 | 0 |
| Peptostreptococcus sp. MV1 | 4 | 0 |
| Streptococcus viridans | 4 | 0 |
| Corynebacterium coyleae | 3 | 1 |
| Malassezia globosa | 3 | 1 |
| Actinomyces cardiffensis | 3 | 0 |
| Actinomyces radingae | 3 | 0 |
| Enterococcus avium | 3 | 0 |
| Neisseria sicca | 3 | 0 |
| Porphyromonas asaccharolytica | 3 | 0 |
| Scardovia inopinata | 3 | 0 |
| Streptococcus pyogenes | 3 | 0 |
| Streptococcus sp. SK643 | 3 | 0 |
| Acetobacter senegalensis | 2 | 2 |
| Staphylococcus hominis | 2 | 2 |
| Actinomyces denticolens | 2 | 0 |
| Anaerococcus octavius | 2 | 0 |
| Atopobium minutum | 2 | 0 |
| Corynebacterium afermentans | 2 | 0 |
| Corynebacterium kroppenstedtii | 2 | 0 |
| Corynebacterium pyruviciproducens | 2 | 0 |
| Lactobacillus paragasseri | 2 | 0 |
| Lactobacillus reuteri | 2 | 0 |
| Lactobacillus salivarius | 2 | 0 |
| Mycobacterium intracellulare | 2 | 0 |
| Prevotella nigrescens | 2 | 0 |
| Prevotella sp. F0091 | 2 | 0 |
| Streptococcus sp. NLAE zl C503 | 2 | 0 |
| Staphylococcus cohnii | 1 | 1 |
| Staphylococcus pettenkoferi | 1 | 1 |
| Acinetobacter johnsonii | 1 | 0 |
| Actinomyces turicensis | 1 | 0 |
| Aspergillus eucalypticola | 1 | 0 |
| Aspergillus kawachii | 1 | 0 |
| Aspergillus lacticoffeatus | 1 | 0 |
| Aspergillus niger | 1 | 0 |
| Aspergillus phoenicis | 1 | 0 |
| Aspergillus sydowii | 1 | 0 |
| Aspergillus thermomutatus | 1 | 0 |
| Aspergillus tubingensis | 1 | 0 |
| Aspergillus turcosus | 1 | 0 |
| Aspergillus vadensis | 1 | 0 |
| Aspergillus welwitschiae | 1 | 0 |
| Bacillus cereus | 1 | 0 |
| Campylobacter gracilis | 1 | 0 |
| Campylobacter mucosalis | 1 | 0 |
| Campylobacter showae | 1 | 0 |
| Candida tropicalis | 1 | 0 |
| Capnocytophaga granulosa | 1 | 0 |
| Capnocytophaga sp.utigena | 1 | 0 |
| Corynebacterium sp. NML140438 | 1 | 0 |
| Dialister micraerophilus | 1 | 0 |
| Eubacterium rectale | 1 | 0 |
| Fusobacterium periodonticum | 1 | 0 |
| Fusobacterium sp. oral taxon 370 | 1 | 0 |
| Haemophilus sp. HMSC71H05 | 1 | 0 |
| Klebsiella michiganensis | 1 | 0 |
| Klebsiella oxytoca | 1 | 0 |
| Lactobacillus oris | 1 | 0 |
| Leptotrichia buccalis | 1 | 0 |
| Leptotrichia hofstadii | 1 | 0 |
| Leptotrichia sp. oral taxon 225 | 1 | 0 |
| Leptotrichia sp. oral taxon 498 | 1 | 0 |
| Leptotrichia sp. oral taxon 879 | 1 | 0 |
| Micrococcus aloeverae | 1 | 0 |
| Moraxella catarrhalis | 1 | 0 |
| Mycolicibacterium fortuitum | 1 | 0 |
| Neisseria macacae | 1 | 0 |
| Neisseria mucosa | 1 | 0 |
| Peptoniphilus harei | 1 | 0 |
| Porphyromonas canoris | 1 | 0 |
| Porphyromonas uenonis | 1 | 0 |
| Prevotella denticola | 1 | 0 |
| Prevotella intermedia | 1 | 0 |
| Prevotella oulorum | 1 | 0 |
| Prevotella pleuritidis | 1 | 0 |
| Prevotella scopos | 1 | 0 |
| Pseudoglutamicibacter cumminsii | 1 | 0 |
| Rickettsia typhi | 1 | 0 |
| Selenomonas flueggei | 1 | 0 |
| Selenomonas noxia | 1 | 0 |
| Selenomonas sp. oral taxon 892 | 1 | 0 |
| Selenomonas sp. oral taxon 920 | 1 | 0 |
| Serratia liquefaciens | 1 | 0 |
| Streptococcus downei | 1 | 0 |
| Streptococcus massiliensis | 1 | 0 |
| Streptococcus salivarius CAG 79 | 1 | 0 |
| Streptococcus sinensis | 1 | 0 |
| Streptococcus sp. DD11 | 1 | 0 |
| Streptococcus sp. HMSC070B10 | 1 | 0 |
| Veillonella tobetsuensis | 1 | 0 |
3.6. Summary
Mean of species richness change by sample
phyloseq$phyloseq_count %>%
sample_data %>%
data.frame() %>%
group_by(subject_id) %>%
summarise(subject_id = subject_id,
treatment = treatment,
S.obs = S.obs,
S.obs_untreated = S.obs)
Table 3. Sequencing summary
Table 3. Summary table of sequencing issues, significant effects linear mixed effect model (species richness ~ treatment + (1|subject)), changes in microbial beta diversity and significant effects of linear mixed effect model (function richness ~ treatment + (1|subject)). Linear mixed effect models were stratified by sample type and employed data after prevalence and abundance filtering.
Molysis for BAL, QIAamp for nasal sab, and Host zero for sputum.
summary_bal <- matrix(nrow=5,ncol=4) %>% data.frame() %>%
rename(Issues = X1, `Species richness` = X2, `Microbial beta diversity` = X3, `Function richness` = X4) %>%
rownames_to_column("x") %>% mutate(x = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
Issues = c("High host%",
"High host%",
"-",
"-",
"High host%"),
`Species richness` = c("-",
"-",
"-",
"+18.2",
"-"),
`Microbial beta diversity` = c("-",
"-",
"-",
"-",
"-"),
`Function richness` = c("-",
"+126",
"+165",
"+190",
"+130")) %>%
column_to_rownames("x")
summary_ns <- matrix(nrow=5,ncol=4) %>% data.frame() %>%
rename(Issues = X1, `Species richness` = X2, `Microbial beta diversity` = X3, `Function richness` = X4) %>%
rownames_to_column("x") %>% mutate(x = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
Issues = c("Gram-stain %",
"High host%",
"Library fail",
"Library fail",
"-"),
`Species richness` = c("-",
"-",
"+9.6",
"+5.7",
"+7.6"),
`Microbial beta diversity` = c("Changed",
"Changed",
"-",
"Changed",
"-"),
`Function richness` = c("-",
"-",
"59",
"45",
"73")) %>%
column_to_rownames("x")
summary_spt <- matrix(nrow=5,ncol=4) %>% data.frame() %>%
rename(Issues = X1, `Species richness` = X2, `Microbial beta diversity` = X3, `Function richness` = X4) %>%
rownames_to_column("x") %>% mutate(x = c("lyPMA", "Benzonase", "Host zero", "Molysis", "QIAamp"),
Issues = c("Gram-stain %",
"Gram-stain %",
"Gram-stain %",
"Gram-stain %",
"Gram-stain %"),
`Species richness` = c("+37.0",
"+65.8",
"+101.6",
"+111.0",
"+84.2"),
`Microbial beta diversity` = c("-",
"Changed",
"Changed",
"Changed",
"Changed"),
`Function richness` = c("+80",
"+86",
"+137",
"+142",
"+116")) %>%
column_to_rownames("x")
table3 <- cbind(summary_bal, summary_ns, summary_spt) %>%
kbl(format = "html", escape = 0) %>%
add_header_above(c(" " = 1, "BAL" = 4, "Nasal swab" = 4,"Sputum"= 4)) %>%
kable_styling(full_width = 0, html_font = "sans")
table3
| Issues | Species richness | Microbial beta diversity | Function richness | Issues | Species richness | Microbial beta diversity | Function richness | Issues | Species richness | Microbial beta diversity | Function richness | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| lyPMA | High host% |
|
|
|
Gram-stain % |
|
Changed |
|
Gram-stain % | +37.0 |
|
+80 |
| Benzonase | High host% |
|
|
+126 | High host% |
|
Changed |
|
Gram-stain % | +65.8 | Changed | +86 |
| Host zero |
|
|
|
+165 | Library fail | +9.6 |
|
59 | Gram-stain % | +101.6 | Changed | +137 |
| Molysis |
|
+18.2 |
|
+190 | Library fail | +5.7 | Changed | 45 | Gram-stain % | +111.0 | Changed | +142 |
| QIAamp | High host% |
|
|
+130 |
|
+7.6 |
|
73 | Gram-stain % | +84.2 | Changed | +116 |
save_kable(table3, file = "/Users/minsikkim/Dropbox (Partners HealthCare)/Project_SICAS2_microbiome/7_Manuscripts/2022_MGK_Host_Depletion/Figures/table3.html", self_contained = T)
Done.
Bibliography
#===============================================================================
#BTC.LineZero.Footer.1.1.0
#===============================================================================
#R markdown citation generator.
#===============================================================================
#RLB.Dependencies:
# magrittr, pacman, stringr
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#BTC.Dependencies:
# LineZero.Header
#===============================================================================
#Generates citations for each explicitly loaded library.
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
str_libraries <- c("r", str_libraries)
for (str_libraries in str_libraries) {
str_libraries |>
pacman::p_citation() |>
print(bibtex = FALSE) |>
capture.output() %>%
.[-1:-3] %>% .[. != ""] |>
stringr::str_squish() |>
stringr::str_replace("_", "") |>
cat()
cat("\n")
}
## Computing. R Foundation for Statistical Computing, Vienna, Austria. <https://www.R-project.org/>. We have invested a lot of time and effort in creating R, please cite it when using it for data analysis. See also 'citation("pkgname")' for citing R packages.
## version 1.4.3, <https://CRAN.R-project.org/package=readxl>.
## graphics of microbiome census data. Paul J. McMurdie and Susan Holmes (2013) PLoS ONE 8(4):e61217.
## Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E, Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the tidyverse." Journal of Open Source Software_, *4*(43), 1686. doi:10.21105/joss.01686 <https://doi.org/10.21105/joss.01686>.
## R. version 0.5.0. Buffalo, New York. http://github.com/trinker/pacman
## J, reikoch, Beasley W, O'Connor B, Warnes GR, Quinn M, Kamvar ZN (2023). yaml: Methods to Convert R Data to YAML and Back_. R package version 2.3.7, <https://CRAN.R-project.org/package=yaml>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## Springer-Verlag New York, 2016.
## O'Hara R, Solymos P, Stevens M, Szoecs E, Wagner H, Barbour M, Bedward M, Bolker B, Borcard D, Carvalho G, Chirico M, De Caceres M, Durand S, Evangelista H, FitzJohn R, Friendly M, Furneaux B, Hannigan G, Hill M, Lahti L, McGlinn D, Ouellette M, Ribeiro Cunha E, Smith T, Stier A, Ter Braak C, Weedon J (2022). vegan: Community Ecology Package. R package version 2.6-4, <https://CRAN.R-project.org/package=vegan>.
## http://microbiome.github.io
## Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Sciaini, and Cédric Scherer (2023). viridis(Lite) - Colorblind-Friendly Color Maps for R. viridis package version 0.6.4.
## "Simple statistical identification and removal of contaminant sequences in marker-gene and metagenomics data." bioRxiv_, 221499. doi:10.1101/221499 <https://doi.org/10.1101/221499>.
## Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## Plots. R package version 0.6.0, <https://CRAN.R-project.org/package=ggpubr>.
## Fitting Linear Mixed-Effects Models Using lme4. Journal of Statistical Software, 67(1), 1-48. doi:10.18637/jss.v067.i01.
## Package: Tests in Linear Mixed Effects Models." Journal of Statistical Software, *82*(13), 1-26. doi:10.18637/jss.v082.i13 <https://doi.org/10.18637/jss.v082.i13>.
## R package version 1.4.2, <https://CRAN.R-project.org/package=writexl>.
## Matrices and Other Utilities. R package version 0.2.3, <https://CRAN.R-project.org/package=harrietr>.
## Population-scale Meta-omics Studies, http://huttenhower.sph.harvard.edu/maaslin2. To cite the MaAsLin 2 software, please use: Mallick H, Rahnavard A, McIver LJ (2020). MaAsLin 2: Multivariable Association in Population-scale Meta-omics Studies. R/Bioconductor package, http://huttenhower.sph.harvard.edu/maaslin2.
## for 'ggplot2'. R package version 0.1.2, <https://CRAN.R-project.org/package=ggtext>.
## package version 0.5.3, <https://CRAN.R-project.org/package=ggpmisc>.
## Graphics. R package version 2.3, <https://CRAN.R-project.org/package=gridExtra>.
## using 'mgcv' and 'lme4'. R package version 0.2-6, <https://CRAN.R-project.org/package=gamm4>. ATTENTION: This citation information has been auto-generated from the package DESCRIPTION file and may need manual editing, see 'help("citation")'.
## Journal of Statistical Software, 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/.
## Pipe Syntax. R package version 1.3.4, <https://CRAN.R-project.org/package=kableExtra>.
## Generation in R. R package version 1.43, <https://yihui.org/knitr/>. Yihui Xie (2015) Dynamic Documents with R and knitr. 2nd edition. Chapman and Hall/CRC. ISBN 978-1498716963 Yihui Xie (2014) knitr: A Comprehensive Tool for Reproducible Research in R. In Victoria Stodden, Friedrich Leisch and Roger D. Peng, editors, Implementing Reproducible Computational Research. Chapman and Hall/CRC. ISBN 978-1466561595
## Visualization of Phylogenetic Trees (1st edition). Chapman and Hall/CRC. doi:10.1201/9781003279242 Shuangbin Xu, Lin Li, Xiao Luo, Meijun Chen, Wenli Tang, Li Zhan, Zehan Dai, Tommy T. Lam, Yi Guan, Guangchuang Yu. Ggtree: A serialized data object for visualization of a phylogenetic tree and annotation data. iMeta 2022, 4(1):e56. doi:10.1002/imt2.56 Guangchuang Yu. Using ggtree to visualize data on tree-like structures. Current Protocols in Bioinformatics, 2020, 69:e96. doi: 10.1002/cpbi.96 Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods for mapping and visualizing associated data on phylogeny using ggtree. Molecular Biology and Evolution 2018, 35(2):3041-3043. doi: 10.1093/molbev/msy194 Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam. ggtree: an R package for visualization and annotation of phylogenetic trees with their covariates and other associated data. Methods in Ecology and Evolution 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## Third edition. Sage, Thousand Oaks CA. <https://socialsciences.mcmaster.ca/jfox/Books/Companion/>.
## Imai (2014). mediation: R Package for Causal Mediation Analysis. Journal of Statistical Software, 59(5), 1-38. URL http://www.jstatsoft.org/v59/i05/. For the underlying methods please cite the following papers: Kosuke Imai, Luke Keele and Teppei Yamamoto (2010). Identification, Inference and Sensitivity Analysis for Causal Mediation Effects. Statistical Science, 25(1), 51-71. Kosuke Imai, Luke Keele and Dustin Tingley (2010). A General Approach to Causal Mediation Analysis. Psychological Methods, 15(4), 309-334. Kosuke Imai, Luke Keele, Dustin Tingley and Teppei Yamamoto (2011). Unpacking the Black Box of Causality: Learning about Causal Mechanisms from Experimental and Observational Studies. American Political Science Review, 105(4), 765-789. Kosuke Imai and Teppei Yamamoto (2013). Identification and Sensitivity Analysis for Multiple Causal Mechanisms: Revisiting Evidence from Framing Experiments. Political Analysis, 21(2), 141-171. Kosuke Imai, Luke Keele, Dustin Tingley and Teppei Yamamoto (2010). Causal Mediation Analysis Using R. In Advances in Social Science Research Using R, ed. H. D. Vinod, New York: Springer-Verlag.
#===============================================================================